xref: /linux/kernel/trace/trace.c (revision ea8d7647f9ddf1f81e2027ed305299797299aa03)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 
54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55 
56 #include "trace.h"
57 #include "trace_output.h"
58 
59 #ifdef CONFIG_FTRACE_STARTUP_TEST
60 /*
61  * We need to change this state when a selftest is running.
62  * A selftest will lurk into the ring-buffer to count the
63  * entries inserted during the selftest although some concurrent
64  * insertions into the ring-buffer such as trace_printk could occurred
65  * at the same time, giving false positive or negative results.
66  */
67 static bool __read_mostly tracing_selftest_running;
68 
69 /*
70  * If boot-time tracing including tracers/events via kernel cmdline
71  * is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74 
75 void __init disable_tracing_selftest(const char *reason)
76 {
77 	if (!tracing_selftest_disabled) {
78 		tracing_selftest_disabled = true;
79 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
80 	}
81 }
82 #else
83 #define tracing_selftest_running	0
84 #define tracing_selftest_disabled	0
85 #endif
86 
87 /* Pipe tracepoints to printk */
88 static struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static bool traceoff_after_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93 
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
96 	{ }
97 };
98 
99 static int
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
101 {
102 	return 0;
103 }
104 
105 /*
106  * To prevent the comm cache from being overwritten when no
107  * tracing is active, only save the comm when a trace event
108  * occurred.
109  */
110 DEFINE_PER_CPU(bool, trace_taskinfo_save);
111 
112 /*
113  * Kill all tracing for good (never come back).
114  * It is initialized to 1 but will turn to zero if the initialization
115  * of the tracer is successful. But that is the only place that sets
116  * this back to zero.
117  */
118 static int tracing_disabled = 1;
119 
120 cpumask_var_t __read_mostly	tracing_buffer_mask;
121 
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  * Set instance name if you want to dump the specific trace instance
137  * Multiple instance dump is also supported, and instances are seperated
138  * by commas.
139  */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142 
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145 
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149 	struct module			*mod;
150 	unsigned long			length;
151 };
152 
153 union trace_eval_map_item;
154 
155 struct trace_eval_map_tail {
156 	/*
157 	 * "end" is first and points to NULL as it must be different
158 	 * than "mod" or "eval_string"
159 	 */
160 	union trace_eval_map_item	*next;
161 	const char			*end;	/* points to NULL */
162 };
163 
164 static DEFINE_MUTEX(trace_eval_mutex);
165 
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174 	struct trace_eval_map		map;
175 	struct trace_eval_map_head	head;
176 	struct trace_eval_map_tail	tail;
177 };
178 
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181 
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184 				   struct trace_buffer *buffer,
185 				   unsigned int trace_ctx);
186 
187 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
188 static char *default_bootup_tracer;
189 
190 static bool allocate_snapshot;
191 static bool snapshot_at_boot;
192 
193 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_instance_index;
195 
196 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
197 static int boot_snapshot_index;
198 
199 static int __init set_cmdline_ftrace(char *str)
200 {
201 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
202 	default_bootup_tracer = bootup_tracer_buf;
203 	/* We are using ftrace early, expand it */
204 	trace_set_ring_buffer_expanded(NULL);
205 	return 1;
206 }
207 __setup("ftrace=", set_cmdline_ftrace);
208 
209 int ftrace_dump_on_oops_enabled(void)
210 {
211 	if (!strcmp("0", ftrace_dump_on_oops))
212 		return 0;
213 	else
214 		return 1;
215 }
216 
217 static int __init set_ftrace_dump_on_oops(char *str)
218 {
219 	if (!*str) {
220 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
221 		return 1;
222 	}
223 
224 	if (*str == ',') {
225 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
226 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
227 		return 1;
228 	}
229 
230 	if (*str++ == '=') {
231 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
232 		return 1;
233 	}
234 
235 	return 0;
236 }
237 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
238 
239 static int __init stop_trace_on_warning(char *str)
240 {
241 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 		__disable_trace_on_warning = 1;
243 	return 1;
244 }
245 __setup("traceoff_on_warning", stop_trace_on_warning);
246 
247 static int __init boot_alloc_snapshot(char *str)
248 {
249 	char *slot = boot_snapshot_info + boot_snapshot_index;
250 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
251 	int ret;
252 
253 	if (str[0] == '=') {
254 		str++;
255 		if (strlen(str) >= left)
256 			return -1;
257 
258 		ret = snprintf(slot, left, "%s\t", str);
259 		boot_snapshot_index += ret;
260 	} else {
261 		allocate_snapshot = true;
262 		/* We also need the main ring buffer expanded */
263 		trace_set_ring_buffer_expanded(NULL);
264 	}
265 	return 1;
266 }
267 __setup("alloc_snapshot", boot_alloc_snapshot);
268 
269 
270 static int __init boot_snapshot(char *str)
271 {
272 	snapshot_at_boot = true;
273 	boot_alloc_snapshot(str);
274 	return 1;
275 }
276 __setup("ftrace_boot_snapshot", boot_snapshot);
277 
278 
279 static int __init boot_instance(char *str)
280 {
281 	char *slot = boot_instance_info + boot_instance_index;
282 	int left = sizeof(boot_instance_info) - boot_instance_index;
283 	int ret;
284 
285 	if (strlen(str) >= left)
286 		return -1;
287 
288 	ret = snprintf(slot, left, "%s\t", str);
289 	boot_instance_index += ret;
290 
291 	return 1;
292 }
293 __setup("trace_instance=", boot_instance);
294 
295 
296 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
297 
298 static int __init set_trace_boot_options(char *str)
299 {
300 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
301 	return 1;
302 }
303 __setup("trace_options=", set_trace_boot_options);
304 
305 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
306 static char *trace_boot_clock __initdata;
307 
308 static int __init set_trace_boot_clock(char *str)
309 {
310 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
311 	trace_boot_clock = trace_boot_clock_buf;
312 	return 1;
313 }
314 __setup("trace_clock=", set_trace_boot_clock);
315 
316 static int __init set_tracepoint_printk(char *str)
317 {
318 	/* Ignore the "tp_printk_stop_on_boot" param */
319 	if (*str == '_')
320 		return 0;
321 
322 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
323 		tracepoint_printk = 1;
324 	return 1;
325 }
326 __setup("tp_printk", set_tracepoint_printk);
327 
328 static int __init set_tracepoint_printk_stop(char *str)
329 {
330 	tracepoint_printk_stop_on_boot = true;
331 	return 1;
332 }
333 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
334 
335 static int __init set_traceoff_after_boot(char *str)
336 {
337 	traceoff_after_boot = true;
338 	return 1;
339 }
340 __setup("traceoff_after_boot", set_traceoff_after_boot);
341 
342 unsigned long long ns2usecs(u64 nsec)
343 {
344 	nsec += 500;
345 	do_div(nsec, 1000);
346 	return nsec;
347 }
348 
349 static void
350 trace_process_export(struct trace_export *export,
351 	       struct ring_buffer_event *event, int flag)
352 {
353 	struct trace_entry *entry;
354 	unsigned int size = 0;
355 
356 	if (export->flags & flag) {
357 		entry = ring_buffer_event_data(event);
358 		size = ring_buffer_event_length(event);
359 		export->write(export, entry, size);
360 	}
361 }
362 
363 static DEFINE_MUTEX(ftrace_export_lock);
364 
365 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
366 
367 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
368 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
369 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
370 
371 static inline void ftrace_exports_enable(struct trace_export *export)
372 {
373 	if (export->flags & TRACE_EXPORT_FUNCTION)
374 		static_branch_inc(&trace_function_exports_enabled);
375 
376 	if (export->flags & TRACE_EXPORT_EVENT)
377 		static_branch_inc(&trace_event_exports_enabled);
378 
379 	if (export->flags & TRACE_EXPORT_MARKER)
380 		static_branch_inc(&trace_marker_exports_enabled);
381 }
382 
383 static inline void ftrace_exports_disable(struct trace_export *export)
384 {
385 	if (export->flags & TRACE_EXPORT_FUNCTION)
386 		static_branch_dec(&trace_function_exports_enabled);
387 
388 	if (export->flags & TRACE_EXPORT_EVENT)
389 		static_branch_dec(&trace_event_exports_enabled);
390 
391 	if (export->flags & TRACE_EXPORT_MARKER)
392 		static_branch_dec(&trace_marker_exports_enabled);
393 }
394 
395 static void ftrace_exports(struct ring_buffer_event *event, int flag)
396 {
397 	struct trace_export *export;
398 
399 	preempt_disable_notrace();
400 
401 	export = rcu_dereference_raw_check(ftrace_exports_list);
402 	while (export) {
403 		trace_process_export(export, event, flag);
404 		export = rcu_dereference_raw_check(export->next);
405 	}
406 
407 	preempt_enable_notrace();
408 }
409 
410 static inline void
411 add_trace_export(struct trace_export **list, struct trace_export *export)
412 {
413 	rcu_assign_pointer(export->next, *list);
414 	/*
415 	 * We are entering export into the list but another
416 	 * CPU might be walking that list. We need to make sure
417 	 * the export->next pointer is valid before another CPU sees
418 	 * the export pointer included into the list.
419 	 */
420 	rcu_assign_pointer(*list, export);
421 }
422 
423 static inline int
424 rm_trace_export(struct trace_export **list, struct trace_export *export)
425 {
426 	struct trace_export **p;
427 
428 	for (p = list; *p != NULL; p = &(*p)->next)
429 		if (*p == export)
430 			break;
431 
432 	if (*p != export)
433 		return -1;
434 
435 	rcu_assign_pointer(*p, (*p)->next);
436 
437 	return 0;
438 }
439 
440 static inline void
441 add_ftrace_export(struct trace_export **list, struct trace_export *export)
442 {
443 	ftrace_exports_enable(export);
444 
445 	add_trace_export(list, export);
446 }
447 
448 static inline int
449 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
450 {
451 	int ret;
452 
453 	ret = rm_trace_export(list, export);
454 	ftrace_exports_disable(export);
455 
456 	return ret;
457 }
458 
459 int register_ftrace_export(struct trace_export *export)
460 {
461 	if (WARN_ON_ONCE(!export->write))
462 		return -1;
463 
464 	mutex_lock(&ftrace_export_lock);
465 
466 	add_ftrace_export(&ftrace_exports_list, export);
467 
468 	mutex_unlock(&ftrace_export_lock);
469 
470 	return 0;
471 }
472 EXPORT_SYMBOL_GPL(register_ftrace_export);
473 
474 int unregister_ftrace_export(struct trace_export *export)
475 {
476 	int ret;
477 
478 	mutex_lock(&ftrace_export_lock);
479 
480 	ret = rm_ftrace_export(&ftrace_exports_list, export);
481 
482 	mutex_unlock(&ftrace_export_lock);
483 
484 	return ret;
485 }
486 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
487 
488 /* trace_flags holds trace_options default values */
489 #define TRACE_DEFAULT_FLAGS						\
490 	(FUNCTION_DEFAULT_FLAGS |					\
491 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
492 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
493 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
494 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
495 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
496 
497 /* trace_options that are only supported by global_trace */
498 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
499 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
500 
501 /* trace_flags that are default zero for instances */
502 #define ZEROED_TRACE_FLAGS \
503 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
504 
505 /*
506  * The global_trace is the descriptor that holds the top-level tracing
507  * buffers for the live tracing.
508  */
509 static struct trace_array global_trace = {
510 	.trace_flags = TRACE_DEFAULT_FLAGS,
511 };
512 
513 static struct trace_array *printk_trace = &global_trace;
514 
515 static __always_inline bool printk_binsafe(struct trace_array *tr)
516 {
517 	/*
518 	 * The binary format of traceprintk can cause a crash if used
519 	 * by a buffer from another boot. Force the use of the
520 	 * non binary version of trace_printk if the trace_printk
521 	 * buffer is a boot mapped ring buffer.
522 	 */
523 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
524 }
525 
526 static void update_printk_trace(struct trace_array *tr)
527 {
528 	if (printk_trace == tr)
529 		return;
530 
531 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
532 	printk_trace = tr;
533 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
534 }
535 
536 void trace_set_ring_buffer_expanded(struct trace_array *tr)
537 {
538 	if (!tr)
539 		tr = &global_trace;
540 	tr->ring_buffer_expanded = true;
541 }
542 
543 LIST_HEAD(ftrace_trace_arrays);
544 
545 int trace_array_get(struct trace_array *this_tr)
546 {
547 	struct trace_array *tr;
548 
549 	guard(mutex)(&trace_types_lock);
550 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
551 		if (tr == this_tr) {
552 			tr->ref++;
553 			return 0;
554 		}
555 	}
556 
557 	return -ENODEV;
558 }
559 
560 static void __trace_array_put(struct trace_array *this_tr)
561 {
562 	WARN_ON(!this_tr->ref);
563 	this_tr->ref--;
564 }
565 
566 /**
567  * trace_array_put - Decrement the reference counter for this trace array.
568  * @this_tr : pointer to the trace array
569  *
570  * NOTE: Use this when we no longer need the trace array returned by
571  * trace_array_get_by_name(). This ensures the trace array can be later
572  * destroyed.
573  *
574  */
575 void trace_array_put(struct trace_array *this_tr)
576 {
577 	if (!this_tr)
578 		return;
579 
580 	mutex_lock(&trace_types_lock);
581 	__trace_array_put(this_tr);
582 	mutex_unlock(&trace_types_lock);
583 }
584 EXPORT_SYMBOL_GPL(trace_array_put);
585 
586 int tracing_check_open_get_tr(struct trace_array *tr)
587 {
588 	int ret;
589 
590 	ret = security_locked_down(LOCKDOWN_TRACEFS);
591 	if (ret)
592 		return ret;
593 
594 	if (tracing_disabled)
595 		return -ENODEV;
596 
597 	if (tr && trace_array_get(tr) < 0)
598 		return -ENODEV;
599 
600 	return 0;
601 }
602 
603 /**
604  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
605  * @filtered_pids: The list of pids to check
606  * @search_pid: The PID to find in @filtered_pids
607  *
608  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
609  */
610 bool
611 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
612 {
613 	return trace_pid_list_is_set(filtered_pids, search_pid);
614 }
615 
616 /**
617  * trace_ignore_this_task - should a task be ignored for tracing
618  * @filtered_pids: The list of pids to check
619  * @filtered_no_pids: The list of pids not to be traced
620  * @task: The task that should be ignored if not filtered
621  *
622  * Checks if @task should be traced or not from @filtered_pids.
623  * Returns true if @task should *NOT* be traced.
624  * Returns false if @task should be traced.
625  */
626 bool
627 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
628 		       struct trace_pid_list *filtered_no_pids,
629 		       struct task_struct *task)
630 {
631 	/*
632 	 * If filtered_no_pids is not empty, and the task's pid is listed
633 	 * in filtered_no_pids, then return true.
634 	 * Otherwise, if filtered_pids is empty, that means we can
635 	 * trace all tasks. If it has content, then only trace pids
636 	 * within filtered_pids.
637 	 */
638 
639 	return (filtered_pids &&
640 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
641 		(filtered_no_pids &&
642 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
643 }
644 
645 /**
646  * trace_filter_add_remove_task - Add or remove a task from a pid_list
647  * @pid_list: The list to modify
648  * @self: The current task for fork or NULL for exit
649  * @task: The task to add or remove
650  *
651  * If adding a task, if @self is defined, the task is only added if @self
652  * is also included in @pid_list. This happens on fork and tasks should
653  * only be added when the parent is listed. If @self is NULL, then the
654  * @task pid will be removed from the list, which would happen on exit
655  * of a task.
656  */
657 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
658 				  struct task_struct *self,
659 				  struct task_struct *task)
660 {
661 	if (!pid_list)
662 		return;
663 
664 	/* For forks, we only add if the forking task is listed */
665 	if (self) {
666 		if (!trace_find_filtered_pid(pid_list, self->pid))
667 			return;
668 	}
669 
670 	/* "self" is set for forks, and NULL for exits */
671 	if (self)
672 		trace_pid_list_set(pid_list, task->pid);
673 	else
674 		trace_pid_list_clear(pid_list, task->pid);
675 }
676 
677 /**
678  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
679  * @pid_list: The pid list to show
680  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
681  * @pos: The position of the file
682  *
683  * This is used by the seq_file "next" operation to iterate the pids
684  * listed in a trace_pid_list structure.
685  *
686  * Returns the pid+1 as we want to display pid of zero, but NULL would
687  * stop the iteration.
688  */
689 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
690 {
691 	long pid = (unsigned long)v;
692 	unsigned int next;
693 
694 	(*pos)++;
695 
696 	/* pid already is +1 of the actual previous bit */
697 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
698 		return NULL;
699 
700 	pid = next;
701 
702 	/* Return pid + 1 to allow zero to be represented */
703 	return (void *)(pid + 1);
704 }
705 
706 /**
707  * trace_pid_start - Used for seq_file to start reading pid lists
708  * @pid_list: The pid list to show
709  * @pos: The position of the file
710  *
711  * This is used by seq_file "start" operation to start the iteration
712  * of listing pids.
713  *
714  * Returns the pid+1 as we want to display pid of zero, but NULL would
715  * stop the iteration.
716  */
717 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
718 {
719 	unsigned long pid;
720 	unsigned int first;
721 	loff_t l = 0;
722 
723 	if (trace_pid_list_first(pid_list, &first) < 0)
724 		return NULL;
725 
726 	pid = first;
727 
728 	/* Return pid + 1 so that zero can be the exit value */
729 	for (pid++; pid && l < *pos;
730 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
731 		;
732 	return (void *)pid;
733 }
734 
735 /**
736  * trace_pid_show - show the current pid in seq_file processing
737  * @m: The seq_file structure to write into
738  * @v: A void pointer of the pid (+1) value to display
739  *
740  * Can be directly used by seq_file operations to display the current
741  * pid value.
742  */
743 int trace_pid_show(struct seq_file *m, void *v)
744 {
745 	unsigned long pid = (unsigned long)v - 1;
746 
747 	seq_printf(m, "%lu\n", pid);
748 	return 0;
749 }
750 
751 /* 128 should be much more than enough */
752 #define PID_BUF_SIZE		127
753 
754 int trace_pid_write(struct trace_pid_list *filtered_pids,
755 		    struct trace_pid_list **new_pid_list,
756 		    const char __user *ubuf, size_t cnt)
757 {
758 	struct trace_pid_list *pid_list;
759 	struct trace_parser parser;
760 	unsigned long val;
761 	int nr_pids = 0;
762 	ssize_t read = 0;
763 	ssize_t ret;
764 	loff_t pos;
765 	pid_t pid;
766 
767 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
768 		return -ENOMEM;
769 
770 	/*
771 	 * Always recreate a new array. The write is an all or nothing
772 	 * operation. Always create a new array when adding new pids by
773 	 * the user. If the operation fails, then the current list is
774 	 * not modified.
775 	 */
776 	pid_list = trace_pid_list_alloc();
777 	if (!pid_list) {
778 		trace_parser_put(&parser);
779 		return -ENOMEM;
780 	}
781 
782 	if (filtered_pids) {
783 		/* copy the current bits to the new max */
784 		ret = trace_pid_list_first(filtered_pids, &pid);
785 		while (!ret) {
786 			trace_pid_list_set(pid_list, pid);
787 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
788 			nr_pids++;
789 		}
790 	}
791 
792 	ret = 0;
793 	while (cnt > 0) {
794 
795 		pos = 0;
796 
797 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
798 		if (ret < 0)
799 			break;
800 
801 		read += ret;
802 		ubuf += ret;
803 		cnt -= ret;
804 
805 		if (!trace_parser_loaded(&parser))
806 			break;
807 
808 		ret = -EINVAL;
809 		if (kstrtoul(parser.buffer, 0, &val))
810 			break;
811 
812 		pid = (pid_t)val;
813 
814 		if (trace_pid_list_set(pid_list, pid) < 0) {
815 			ret = -1;
816 			break;
817 		}
818 		nr_pids++;
819 
820 		trace_parser_clear(&parser);
821 		ret = 0;
822 	}
823 	trace_parser_put(&parser);
824 
825 	if (ret < 0) {
826 		trace_pid_list_free(pid_list);
827 		return ret;
828 	}
829 
830 	if (!nr_pids) {
831 		/* Cleared the list of pids */
832 		trace_pid_list_free(pid_list);
833 		pid_list = NULL;
834 	}
835 
836 	*new_pid_list = pid_list;
837 
838 	return read;
839 }
840 
841 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
842 {
843 	u64 ts;
844 
845 	/* Early boot up does not have a buffer yet */
846 	if (!buf->buffer)
847 		return trace_clock_local();
848 
849 	ts = ring_buffer_time_stamp(buf->buffer);
850 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
851 
852 	return ts;
853 }
854 
855 u64 ftrace_now(int cpu)
856 {
857 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
858 }
859 
860 /**
861  * tracing_is_enabled - Show if global_trace has been enabled
862  *
863  * Shows if the global trace has been enabled or not. It uses the
864  * mirror flag "buffer_disabled" to be used in fast paths such as for
865  * the irqsoff tracer. But it may be inaccurate due to races. If you
866  * need to know the accurate state, use tracing_is_on() which is a little
867  * slower, but accurate.
868  */
869 int tracing_is_enabled(void)
870 {
871 	/*
872 	 * For quick access (irqsoff uses this in fast path), just
873 	 * return the mirror variable of the state of the ring buffer.
874 	 * It's a little racy, but we don't really care.
875 	 */
876 	smp_rmb();
877 	return !global_trace.buffer_disabled;
878 }
879 
880 /*
881  * trace_buf_size is the size in bytes that is allocated
882  * for a buffer. Note, the number of bytes is always rounded
883  * to page size.
884  *
885  * This number is purposely set to a low number of 16384.
886  * If the dump on oops happens, it will be much appreciated
887  * to not have to wait for all that output. Anyway this can be
888  * boot time and run time configurable.
889  */
890 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
891 
892 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
893 
894 /* trace_types holds a link list of available tracers. */
895 static struct tracer		*trace_types __read_mostly;
896 
897 /*
898  * trace_types_lock is used to protect the trace_types list.
899  */
900 DEFINE_MUTEX(trace_types_lock);
901 
902 /*
903  * serialize the access of the ring buffer
904  *
905  * ring buffer serializes readers, but it is low level protection.
906  * The validity of the events (which returns by ring_buffer_peek() ..etc)
907  * are not protected by ring buffer.
908  *
909  * The content of events may become garbage if we allow other process consumes
910  * these events concurrently:
911  *   A) the page of the consumed events may become a normal page
912  *      (not reader page) in ring buffer, and this page will be rewritten
913  *      by events producer.
914  *   B) The page of the consumed events may become a page for splice_read,
915  *      and this page will be returned to system.
916  *
917  * These primitives allow multi process access to different cpu ring buffer
918  * concurrently.
919  *
920  * These primitives don't distinguish read-only and read-consume access.
921  * Multi read-only access are also serialized.
922  */
923 
924 #ifdef CONFIG_SMP
925 static DECLARE_RWSEM(all_cpu_access_lock);
926 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
927 
928 static inline void trace_access_lock(int cpu)
929 {
930 	if (cpu == RING_BUFFER_ALL_CPUS) {
931 		/* gain it for accessing the whole ring buffer. */
932 		down_write(&all_cpu_access_lock);
933 	} else {
934 		/* gain it for accessing a cpu ring buffer. */
935 
936 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
937 		down_read(&all_cpu_access_lock);
938 
939 		/* Secondly block other access to this @cpu ring buffer. */
940 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
941 	}
942 }
943 
944 static inline void trace_access_unlock(int cpu)
945 {
946 	if (cpu == RING_BUFFER_ALL_CPUS) {
947 		up_write(&all_cpu_access_lock);
948 	} else {
949 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
950 		up_read(&all_cpu_access_lock);
951 	}
952 }
953 
954 static inline void trace_access_lock_init(void)
955 {
956 	int cpu;
957 
958 	for_each_possible_cpu(cpu)
959 		mutex_init(&per_cpu(cpu_access_lock, cpu));
960 }
961 
962 #else
963 
964 static DEFINE_MUTEX(access_lock);
965 
966 static inline void trace_access_lock(int cpu)
967 {
968 	(void)cpu;
969 	mutex_lock(&access_lock);
970 }
971 
972 static inline void trace_access_unlock(int cpu)
973 {
974 	(void)cpu;
975 	mutex_unlock(&access_lock);
976 }
977 
978 static inline void trace_access_lock_init(void)
979 {
980 }
981 
982 #endif
983 
984 #ifdef CONFIG_STACKTRACE
985 static void __ftrace_trace_stack(struct trace_array *tr,
986 				 struct trace_buffer *buffer,
987 				 unsigned int trace_ctx,
988 				 int skip, struct pt_regs *regs);
989 static inline void ftrace_trace_stack(struct trace_array *tr,
990 				      struct trace_buffer *buffer,
991 				      unsigned int trace_ctx,
992 				      int skip, struct pt_regs *regs);
993 
994 #else
995 static inline void __ftrace_trace_stack(struct trace_array *tr,
996 					struct trace_buffer *buffer,
997 					unsigned int trace_ctx,
998 					int skip, struct pt_regs *regs)
999 {
1000 }
1001 static inline void ftrace_trace_stack(struct trace_array *tr,
1002 				      struct trace_buffer *buffer,
1003 				      unsigned long trace_ctx,
1004 				      int skip, struct pt_regs *regs)
1005 {
1006 }
1007 
1008 #endif
1009 
1010 static __always_inline void
1011 trace_event_setup(struct ring_buffer_event *event,
1012 		  int type, unsigned int trace_ctx)
1013 {
1014 	struct trace_entry *ent = ring_buffer_event_data(event);
1015 
1016 	tracing_generic_entry_update(ent, type, trace_ctx);
1017 }
1018 
1019 static __always_inline struct ring_buffer_event *
1020 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1021 			  int type,
1022 			  unsigned long len,
1023 			  unsigned int trace_ctx)
1024 {
1025 	struct ring_buffer_event *event;
1026 
1027 	event = ring_buffer_lock_reserve(buffer, len);
1028 	if (event != NULL)
1029 		trace_event_setup(event, type, trace_ctx);
1030 
1031 	return event;
1032 }
1033 
1034 void tracer_tracing_on(struct trace_array *tr)
1035 {
1036 	if (tr->array_buffer.buffer)
1037 		ring_buffer_record_on(tr->array_buffer.buffer);
1038 	/*
1039 	 * This flag is looked at when buffers haven't been allocated
1040 	 * yet, or by some tracers (like irqsoff), that just want to
1041 	 * know if the ring buffer has been disabled, but it can handle
1042 	 * races of where it gets disabled but we still do a record.
1043 	 * As the check is in the fast path of the tracers, it is more
1044 	 * important to be fast than accurate.
1045 	 */
1046 	tr->buffer_disabled = 0;
1047 	/* Make the flag seen by readers */
1048 	smp_wmb();
1049 }
1050 
1051 /**
1052  * tracing_on - enable tracing buffers
1053  *
1054  * This function enables tracing buffers that may have been
1055  * disabled with tracing_off.
1056  */
1057 void tracing_on(void)
1058 {
1059 	tracer_tracing_on(&global_trace);
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_on);
1062 
1063 
1064 static __always_inline void
1065 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1066 {
1067 	__this_cpu_write(trace_taskinfo_save, true);
1068 
1069 	/* If this is the temp buffer, we need to commit fully */
1070 	if (this_cpu_read(trace_buffered_event) == event) {
1071 		/* Length is in event->array[0] */
1072 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1073 		/* Release the temp buffer */
1074 		this_cpu_dec(trace_buffered_event_cnt);
1075 		/* ring_buffer_unlock_commit() enables preemption */
1076 		preempt_enable_notrace();
1077 	} else
1078 		ring_buffer_unlock_commit(buffer);
1079 }
1080 
1081 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1082 		       const char *str, int size)
1083 {
1084 	struct ring_buffer_event *event;
1085 	struct trace_buffer *buffer;
1086 	struct print_entry *entry;
1087 	unsigned int trace_ctx;
1088 	int alloc;
1089 
1090 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1091 		return 0;
1092 
1093 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1094 		return 0;
1095 
1096 	if (unlikely(tracing_disabled))
1097 		return 0;
1098 
1099 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1100 
1101 	trace_ctx = tracing_gen_ctx();
1102 	buffer = tr->array_buffer.buffer;
1103 	ring_buffer_nest_start(buffer);
1104 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1105 					    trace_ctx);
1106 	if (!event) {
1107 		size = 0;
1108 		goto out;
1109 	}
1110 
1111 	entry = ring_buffer_event_data(event);
1112 	entry->ip = ip;
1113 
1114 	memcpy(&entry->buf, str, size);
1115 
1116 	/* Add a newline if necessary */
1117 	if (entry->buf[size - 1] != '\n') {
1118 		entry->buf[size] = '\n';
1119 		entry->buf[size + 1] = '\0';
1120 	} else
1121 		entry->buf[size] = '\0';
1122 
1123 	__buffer_unlock_commit(buffer, event);
1124 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1125  out:
1126 	ring_buffer_nest_end(buffer);
1127 	return size;
1128 }
1129 EXPORT_SYMBOL_GPL(__trace_array_puts);
1130 
1131 /**
1132  * __trace_puts - write a constant string into the trace buffer.
1133  * @ip:	   The address of the caller
1134  * @str:   The constant string to write
1135  * @size:  The size of the string.
1136  */
1137 int __trace_puts(unsigned long ip, const char *str, int size)
1138 {
1139 	return __trace_array_puts(printk_trace, ip, str, size);
1140 }
1141 EXPORT_SYMBOL_GPL(__trace_puts);
1142 
1143 /**
1144  * __trace_bputs - write the pointer to a constant string into trace buffer
1145  * @ip:	   The address of the caller
1146  * @str:   The constant string to write to the buffer to
1147  */
1148 int __trace_bputs(unsigned long ip, const char *str)
1149 {
1150 	struct trace_array *tr = READ_ONCE(printk_trace);
1151 	struct ring_buffer_event *event;
1152 	struct trace_buffer *buffer;
1153 	struct bputs_entry *entry;
1154 	unsigned int trace_ctx;
1155 	int size = sizeof(struct bputs_entry);
1156 	int ret = 0;
1157 
1158 	if (!printk_binsafe(tr))
1159 		return __trace_puts(ip, str, strlen(str));
1160 
1161 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1162 		return 0;
1163 
1164 	if (unlikely(tracing_selftest_running || tracing_disabled))
1165 		return 0;
1166 
1167 	trace_ctx = tracing_gen_ctx();
1168 	buffer = tr->array_buffer.buffer;
1169 
1170 	ring_buffer_nest_start(buffer);
1171 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1172 					    trace_ctx);
1173 	if (!event)
1174 		goto out;
1175 
1176 	entry = ring_buffer_event_data(event);
1177 	entry->ip			= ip;
1178 	entry->str			= str;
1179 
1180 	__buffer_unlock_commit(buffer, event);
1181 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1182 
1183 	ret = 1;
1184  out:
1185 	ring_buffer_nest_end(buffer);
1186 	return ret;
1187 }
1188 EXPORT_SYMBOL_GPL(__trace_bputs);
1189 
1190 #ifdef CONFIG_TRACER_SNAPSHOT
1191 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1192 					   void *cond_data)
1193 {
1194 	struct tracer *tracer = tr->current_trace;
1195 	unsigned long flags;
1196 
1197 	if (in_nmi()) {
1198 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1199 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1200 		return;
1201 	}
1202 
1203 	if (!tr->allocated_snapshot) {
1204 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1205 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1206 		tracer_tracing_off(tr);
1207 		return;
1208 	}
1209 
1210 	/* Note, snapshot can not be used when the tracer uses it */
1211 	if (tracer->use_max_tr) {
1212 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1213 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1214 		return;
1215 	}
1216 
1217 	if (tr->mapped) {
1218 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1219 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1220 		return;
1221 	}
1222 
1223 	local_irq_save(flags);
1224 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1225 	local_irq_restore(flags);
1226 }
1227 
1228 void tracing_snapshot_instance(struct trace_array *tr)
1229 {
1230 	tracing_snapshot_instance_cond(tr, NULL);
1231 }
1232 
1233 /**
1234  * tracing_snapshot - take a snapshot of the current buffer.
1235  *
1236  * This causes a swap between the snapshot buffer and the current live
1237  * tracing buffer. You can use this to take snapshots of the live
1238  * trace when some condition is triggered, but continue to trace.
1239  *
1240  * Note, make sure to allocate the snapshot with either
1241  * a tracing_snapshot_alloc(), or by doing it manually
1242  * with: echo 1 > /sys/kernel/tracing/snapshot
1243  *
1244  * If the snapshot buffer is not allocated, it will stop tracing.
1245  * Basically making a permanent snapshot.
1246  */
1247 void tracing_snapshot(void)
1248 {
1249 	struct trace_array *tr = &global_trace;
1250 
1251 	tracing_snapshot_instance(tr);
1252 }
1253 EXPORT_SYMBOL_GPL(tracing_snapshot);
1254 
1255 /**
1256  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1257  * @tr:		The tracing instance to snapshot
1258  * @cond_data:	The data to be tested conditionally, and possibly saved
1259  *
1260  * This is the same as tracing_snapshot() except that the snapshot is
1261  * conditional - the snapshot will only happen if the
1262  * cond_snapshot.update() implementation receiving the cond_data
1263  * returns true, which means that the trace array's cond_snapshot
1264  * update() operation used the cond_data to determine whether the
1265  * snapshot should be taken, and if it was, presumably saved it along
1266  * with the snapshot.
1267  */
1268 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1269 {
1270 	tracing_snapshot_instance_cond(tr, cond_data);
1271 }
1272 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1273 
1274 /**
1275  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1276  * @tr:		The tracing instance
1277  *
1278  * When the user enables a conditional snapshot using
1279  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1280  * with the snapshot.  This accessor is used to retrieve it.
1281  *
1282  * Should not be called from cond_snapshot.update(), since it takes
1283  * the tr->max_lock lock, which the code calling
1284  * cond_snapshot.update() has already done.
1285  *
1286  * Returns the cond_data associated with the trace array's snapshot.
1287  */
1288 void *tracing_cond_snapshot_data(struct trace_array *tr)
1289 {
1290 	void *cond_data = NULL;
1291 
1292 	local_irq_disable();
1293 	arch_spin_lock(&tr->max_lock);
1294 
1295 	if (tr->cond_snapshot)
1296 		cond_data = tr->cond_snapshot->cond_data;
1297 
1298 	arch_spin_unlock(&tr->max_lock);
1299 	local_irq_enable();
1300 
1301 	return cond_data;
1302 }
1303 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1304 
1305 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1306 					struct array_buffer *size_buf, int cpu_id);
1307 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1308 
1309 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1310 {
1311 	int order;
1312 	int ret;
1313 
1314 	if (!tr->allocated_snapshot) {
1315 
1316 		/* Make the snapshot buffer have the same order as main buffer */
1317 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1318 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1319 		if (ret < 0)
1320 			return ret;
1321 
1322 		/* allocate spare buffer */
1323 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1324 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1325 		if (ret < 0)
1326 			return ret;
1327 
1328 		tr->allocated_snapshot = true;
1329 	}
1330 
1331 	return 0;
1332 }
1333 
1334 static void free_snapshot(struct trace_array *tr)
1335 {
1336 	/*
1337 	 * We don't free the ring buffer. instead, resize it because
1338 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1339 	 * we want preserve it.
1340 	 */
1341 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1342 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1343 	set_buffer_entries(&tr->max_buffer, 1);
1344 	tracing_reset_online_cpus(&tr->max_buffer);
1345 	tr->allocated_snapshot = false;
1346 }
1347 
1348 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1349 {
1350 	int ret;
1351 
1352 	lockdep_assert_held(&trace_types_lock);
1353 
1354 	spin_lock(&tr->snapshot_trigger_lock);
1355 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1356 		spin_unlock(&tr->snapshot_trigger_lock);
1357 		return -EBUSY;
1358 	}
1359 
1360 	tr->snapshot++;
1361 	spin_unlock(&tr->snapshot_trigger_lock);
1362 
1363 	ret = tracing_alloc_snapshot_instance(tr);
1364 	if (ret) {
1365 		spin_lock(&tr->snapshot_trigger_lock);
1366 		tr->snapshot--;
1367 		spin_unlock(&tr->snapshot_trigger_lock);
1368 	}
1369 
1370 	return ret;
1371 }
1372 
1373 int tracing_arm_snapshot(struct trace_array *tr)
1374 {
1375 	int ret;
1376 
1377 	mutex_lock(&trace_types_lock);
1378 	ret = tracing_arm_snapshot_locked(tr);
1379 	mutex_unlock(&trace_types_lock);
1380 
1381 	return ret;
1382 }
1383 
1384 void tracing_disarm_snapshot(struct trace_array *tr)
1385 {
1386 	spin_lock(&tr->snapshot_trigger_lock);
1387 	if (!WARN_ON(!tr->snapshot))
1388 		tr->snapshot--;
1389 	spin_unlock(&tr->snapshot_trigger_lock);
1390 }
1391 
1392 /**
1393  * tracing_alloc_snapshot - allocate snapshot buffer.
1394  *
1395  * This only allocates the snapshot buffer if it isn't already
1396  * allocated - it doesn't also take a snapshot.
1397  *
1398  * This is meant to be used in cases where the snapshot buffer needs
1399  * to be set up for events that can't sleep but need to be able to
1400  * trigger a snapshot.
1401  */
1402 int tracing_alloc_snapshot(void)
1403 {
1404 	struct trace_array *tr = &global_trace;
1405 	int ret;
1406 
1407 	ret = tracing_alloc_snapshot_instance(tr);
1408 	WARN_ON(ret < 0);
1409 
1410 	return ret;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1413 
1414 /**
1415  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1416  *
1417  * This is similar to tracing_snapshot(), but it will allocate the
1418  * snapshot buffer if it isn't already allocated. Use this only
1419  * where it is safe to sleep, as the allocation may sleep.
1420  *
1421  * This causes a swap between the snapshot buffer and the current live
1422  * tracing buffer. You can use this to take snapshots of the live
1423  * trace when some condition is triggered, but continue to trace.
1424  */
1425 void tracing_snapshot_alloc(void)
1426 {
1427 	int ret;
1428 
1429 	ret = tracing_alloc_snapshot();
1430 	if (ret < 0)
1431 		return;
1432 
1433 	tracing_snapshot();
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1436 
1437 /**
1438  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1439  * @tr:		The tracing instance
1440  * @cond_data:	User data to associate with the snapshot
1441  * @update:	Implementation of the cond_snapshot update function
1442  *
1443  * Check whether the conditional snapshot for the given instance has
1444  * already been enabled, or if the current tracer is already using a
1445  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1446  * save the cond_data and update function inside.
1447  *
1448  * Returns 0 if successful, error otherwise.
1449  */
1450 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1451 				 cond_update_fn_t update)
1452 {
1453 	struct cond_snapshot *cond_snapshot __free(kfree) =
1454 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1455 	int ret;
1456 
1457 	if (!cond_snapshot)
1458 		return -ENOMEM;
1459 
1460 	cond_snapshot->cond_data = cond_data;
1461 	cond_snapshot->update = update;
1462 
1463 	guard(mutex)(&trace_types_lock);
1464 
1465 	if (tr->current_trace->use_max_tr)
1466 		return -EBUSY;
1467 
1468 	/*
1469 	 * The cond_snapshot can only change to NULL without the
1470 	 * trace_types_lock. We don't care if we race with it going
1471 	 * to NULL, but we want to make sure that it's not set to
1472 	 * something other than NULL when we get here, which we can
1473 	 * do safely with only holding the trace_types_lock and not
1474 	 * having to take the max_lock.
1475 	 */
1476 	if (tr->cond_snapshot)
1477 		return -EBUSY;
1478 
1479 	ret = tracing_arm_snapshot_locked(tr);
1480 	if (ret)
1481 		return ret;
1482 
1483 	local_irq_disable();
1484 	arch_spin_lock(&tr->max_lock);
1485 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1486 	arch_spin_unlock(&tr->max_lock);
1487 	local_irq_enable();
1488 
1489 	return 0;
1490 }
1491 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1492 
1493 /**
1494  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1495  * @tr:		The tracing instance
1496  *
1497  * Check whether the conditional snapshot for the given instance is
1498  * enabled; if so, free the cond_snapshot associated with it,
1499  * otherwise return -EINVAL.
1500  *
1501  * Returns 0 if successful, error otherwise.
1502  */
1503 int tracing_snapshot_cond_disable(struct trace_array *tr)
1504 {
1505 	int ret = 0;
1506 
1507 	local_irq_disable();
1508 	arch_spin_lock(&tr->max_lock);
1509 
1510 	if (!tr->cond_snapshot)
1511 		ret = -EINVAL;
1512 	else {
1513 		kfree(tr->cond_snapshot);
1514 		tr->cond_snapshot = NULL;
1515 	}
1516 
1517 	arch_spin_unlock(&tr->max_lock);
1518 	local_irq_enable();
1519 
1520 	tracing_disarm_snapshot(tr);
1521 
1522 	return ret;
1523 }
1524 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1525 #else
1526 void tracing_snapshot(void)
1527 {
1528 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot);
1531 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1532 {
1533 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1534 }
1535 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1536 int tracing_alloc_snapshot(void)
1537 {
1538 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1539 	return -ENODEV;
1540 }
1541 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1542 void tracing_snapshot_alloc(void)
1543 {
1544 	/* Give warning */
1545 	tracing_snapshot();
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1548 void *tracing_cond_snapshot_data(struct trace_array *tr)
1549 {
1550 	return NULL;
1551 }
1552 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1553 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1554 {
1555 	return -ENODEV;
1556 }
1557 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1558 int tracing_snapshot_cond_disable(struct trace_array *tr)
1559 {
1560 	return false;
1561 }
1562 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1563 #define free_snapshot(tr)	do { } while (0)
1564 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1565 #endif /* CONFIG_TRACER_SNAPSHOT */
1566 
1567 void tracer_tracing_off(struct trace_array *tr)
1568 {
1569 	if (tr->array_buffer.buffer)
1570 		ring_buffer_record_off(tr->array_buffer.buffer);
1571 	/*
1572 	 * This flag is looked at when buffers haven't been allocated
1573 	 * yet, or by some tracers (like irqsoff), that just want to
1574 	 * know if the ring buffer has been disabled, but it can handle
1575 	 * races of where it gets disabled but we still do a record.
1576 	 * As the check is in the fast path of the tracers, it is more
1577 	 * important to be fast than accurate.
1578 	 */
1579 	tr->buffer_disabled = 1;
1580 	/* Make the flag seen by readers */
1581 	smp_wmb();
1582 }
1583 
1584 /**
1585  * tracing_off - turn off tracing buffers
1586  *
1587  * This function stops the tracing buffers from recording data.
1588  * It does not disable any overhead the tracers themselves may
1589  * be causing. This function simply causes all recording to
1590  * the ring buffers to fail.
1591  */
1592 void tracing_off(void)
1593 {
1594 	tracer_tracing_off(&global_trace);
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_off);
1597 
1598 void disable_trace_on_warning(void)
1599 {
1600 	if (__disable_trace_on_warning) {
1601 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1602 			"Disabling tracing due to warning\n");
1603 		tracing_off();
1604 	}
1605 }
1606 
1607 /**
1608  * tracer_tracing_is_on - show real state of ring buffer enabled
1609  * @tr : the trace array to know if ring buffer is enabled
1610  *
1611  * Shows real state of the ring buffer if it is enabled or not.
1612  */
1613 bool tracer_tracing_is_on(struct trace_array *tr)
1614 {
1615 	if (tr->array_buffer.buffer)
1616 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1617 	return !tr->buffer_disabled;
1618 }
1619 
1620 /**
1621  * tracing_is_on - show state of ring buffers enabled
1622  */
1623 int tracing_is_on(void)
1624 {
1625 	return tracer_tracing_is_on(&global_trace);
1626 }
1627 EXPORT_SYMBOL_GPL(tracing_is_on);
1628 
1629 static int __init set_buf_size(char *str)
1630 {
1631 	unsigned long buf_size;
1632 
1633 	if (!str)
1634 		return 0;
1635 	buf_size = memparse(str, &str);
1636 	/*
1637 	 * nr_entries can not be zero and the startup
1638 	 * tests require some buffer space. Therefore
1639 	 * ensure we have at least 4096 bytes of buffer.
1640 	 */
1641 	trace_buf_size = max(4096UL, buf_size);
1642 	return 1;
1643 }
1644 __setup("trace_buf_size=", set_buf_size);
1645 
1646 static int __init set_tracing_thresh(char *str)
1647 {
1648 	unsigned long threshold;
1649 	int ret;
1650 
1651 	if (!str)
1652 		return 0;
1653 	ret = kstrtoul(str, 0, &threshold);
1654 	if (ret < 0)
1655 		return 0;
1656 	tracing_thresh = threshold * 1000;
1657 	return 1;
1658 }
1659 __setup("tracing_thresh=", set_tracing_thresh);
1660 
1661 unsigned long nsecs_to_usecs(unsigned long nsecs)
1662 {
1663 	return nsecs / 1000;
1664 }
1665 
1666 /*
1667  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1668  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1669  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1670  * of strings in the order that the evals (enum) were defined.
1671  */
1672 #undef C
1673 #define C(a, b) b
1674 
1675 /* These must match the bit positions in trace_iterator_flags */
1676 static const char *trace_options[] = {
1677 	TRACE_FLAGS
1678 	NULL
1679 };
1680 
1681 static struct {
1682 	u64 (*func)(void);
1683 	const char *name;
1684 	int in_ns;		/* is this clock in nanoseconds? */
1685 } trace_clocks[] = {
1686 	{ trace_clock_local,		"local",	1 },
1687 	{ trace_clock_global,		"global",	1 },
1688 	{ trace_clock_counter,		"counter",	0 },
1689 	{ trace_clock_jiffies,		"uptime",	0 },
1690 	{ trace_clock,			"perf",		1 },
1691 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1692 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1693 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1694 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1695 	ARCH_TRACE_CLOCKS
1696 };
1697 
1698 bool trace_clock_in_ns(struct trace_array *tr)
1699 {
1700 	if (trace_clocks[tr->clock_id].in_ns)
1701 		return true;
1702 
1703 	return false;
1704 }
1705 
1706 /*
1707  * trace_parser_get_init - gets the buffer for trace parser
1708  */
1709 int trace_parser_get_init(struct trace_parser *parser, int size)
1710 {
1711 	memset(parser, 0, sizeof(*parser));
1712 
1713 	parser->buffer = kmalloc(size, GFP_KERNEL);
1714 	if (!parser->buffer)
1715 		return 1;
1716 
1717 	parser->size = size;
1718 	return 0;
1719 }
1720 
1721 /*
1722  * trace_parser_put - frees the buffer for trace parser
1723  */
1724 void trace_parser_put(struct trace_parser *parser)
1725 {
1726 	kfree(parser->buffer);
1727 	parser->buffer = NULL;
1728 }
1729 
1730 /*
1731  * trace_get_user - reads the user input string separated by  space
1732  * (matched by isspace(ch))
1733  *
1734  * For each string found the 'struct trace_parser' is updated,
1735  * and the function returns.
1736  *
1737  * Returns number of bytes read.
1738  *
1739  * See kernel/trace/trace.h for 'struct trace_parser' details.
1740  */
1741 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1742 	size_t cnt, loff_t *ppos)
1743 {
1744 	char ch;
1745 	size_t read = 0;
1746 	ssize_t ret;
1747 
1748 	if (!*ppos)
1749 		trace_parser_clear(parser);
1750 
1751 	ret = get_user(ch, ubuf++);
1752 	if (ret)
1753 		goto out;
1754 
1755 	read++;
1756 	cnt--;
1757 
1758 	/*
1759 	 * The parser is not finished with the last write,
1760 	 * continue reading the user input without skipping spaces.
1761 	 */
1762 	if (!parser->cont) {
1763 		/* skip white space */
1764 		while (cnt && isspace(ch)) {
1765 			ret = get_user(ch, ubuf++);
1766 			if (ret)
1767 				goto out;
1768 			read++;
1769 			cnt--;
1770 		}
1771 
1772 		parser->idx = 0;
1773 
1774 		/* only spaces were written */
1775 		if (isspace(ch) || !ch) {
1776 			*ppos += read;
1777 			ret = read;
1778 			goto out;
1779 		}
1780 	}
1781 
1782 	/* read the non-space input */
1783 	while (cnt && !isspace(ch) && ch) {
1784 		if (parser->idx < parser->size - 1)
1785 			parser->buffer[parser->idx++] = ch;
1786 		else {
1787 			ret = -EINVAL;
1788 			goto out;
1789 		}
1790 		ret = get_user(ch, ubuf++);
1791 		if (ret)
1792 			goto out;
1793 		read++;
1794 		cnt--;
1795 	}
1796 
1797 	/* We either got finished input or we have to wait for another call. */
1798 	if (isspace(ch) || !ch) {
1799 		parser->buffer[parser->idx] = 0;
1800 		parser->cont = false;
1801 	} else if (parser->idx < parser->size - 1) {
1802 		parser->cont = true;
1803 		parser->buffer[parser->idx++] = ch;
1804 		/* Make sure the parsed string always terminates with '\0'. */
1805 		parser->buffer[parser->idx] = 0;
1806 	} else {
1807 		ret = -EINVAL;
1808 		goto out;
1809 	}
1810 
1811 	*ppos += read;
1812 	ret = read;
1813 
1814 out:
1815 	return ret;
1816 }
1817 
1818 /* TODO add a seq_buf_to_buffer() */
1819 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1820 {
1821 	int len;
1822 
1823 	if (trace_seq_used(s) <= s->readpos)
1824 		return -EBUSY;
1825 
1826 	len = trace_seq_used(s) - s->readpos;
1827 	if (cnt > len)
1828 		cnt = len;
1829 	memcpy(buf, s->buffer + s->readpos, cnt);
1830 
1831 	s->readpos += cnt;
1832 	return cnt;
1833 }
1834 
1835 unsigned long __read_mostly	tracing_thresh;
1836 
1837 #ifdef CONFIG_TRACER_MAX_TRACE
1838 static const struct file_operations tracing_max_lat_fops;
1839 
1840 #ifdef LATENCY_FS_NOTIFY
1841 
1842 static struct workqueue_struct *fsnotify_wq;
1843 
1844 static void latency_fsnotify_workfn(struct work_struct *work)
1845 {
1846 	struct trace_array *tr = container_of(work, struct trace_array,
1847 					      fsnotify_work);
1848 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1849 }
1850 
1851 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1852 {
1853 	struct trace_array *tr = container_of(iwork, struct trace_array,
1854 					      fsnotify_irqwork);
1855 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1856 }
1857 
1858 static void trace_create_maxlat_file(struct trace_array *tr,
1859 				     struct dentry *d_tracer)
1860 {
1861 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1862 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1863 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1864 					      TRACE_MODE_WRITE,
1865 					      d_tracer, tr,
1866 					      &tracing_max_lat_fops);
1867 }
1868 
1869 __init static int latency_fsnotify_init(void)
1870 {
1871 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1872 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1873 	if (!fsnotify_wq) {
1874 		pr_err("Unable to allocate tr_max_lat_wq\n");
1875 		return -ENOMEM;
1876 	}
1877 	return 0;
1878 }
1879 
1880 late_initcall_sync(latency_fsnotify_init);
1881 
1882 void latency_fsnotify(struct trace_array *tr)
1883 {
1884 	if (!fsnotify_wq)
1885 		return;
1886 	/*
1887 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1888 	 * possible that we are called from __schedule() or do_idle(), which
1889 	 * could cause a deadlock.
1890 	 */
1891 	irq_work_queue(&tr->fsnotify_irqwork);
1892 }
1893 
1894 #else /* !LATENCY_FS_NOTIFY */
1895 
1896 #define trace_create_maxlat_file(tr, d_tracer)				\
1897 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1898 			  d_tracer, tr, &tracing_max_lat_fops)
1899 
1900 #endif
1901 
1902 /*
1903  * Copy the new maximum trace into the separate maximum-trace
1904  * structure. (this way the maximum trace is permanently saved,
1905  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1906  */
1907 static void
1908 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1909 {
1910 	struct array_buffer *trace_buf = &tr->array_buffer;
1911 	struct array_buffer *max_buf = &tr->max_buffer;
1912 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1913 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1914 
1915 	max_buf->cpu = cpu;
1916 	max_buf->time_start = data->preempt_timestamp;
1917 
1918 	max_data->saved_latency = tr->max_latency;
1919 	max_data->critical_start = data->critical_start;
1920 	max_data->critical_end = data->critical_end;
1921 
1922 	strscpy(max_data->comm, tsk->comm);
1923 	max_data->pid = tsk->pid;
1924 	/*
1925 	 * If tsk == current, then use current_uid(), as that does not use
1926 	 * RCU. The irq tracer can be called out of RCU scope.
1927 	 */
1928 	if (tsk == current)
1929 		max_data->uid = current_uid();
1930 	else
1931 		max_data->uid = task_uid(tsk);
1932 
1933 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1934 	max_data->policy = tsk->policy;
1935 	max_data->rt_priority = tsk->rt_priority;
1936 
1937 	/* record this tasks comm */
1938 	tracing_record_cmdline(tsk);
1939 	latency_fsnotify(tr);
1940 }
1941 
1942 /**
1943  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1944  * @tr: tracer
1945  * @tsk: the task with the latency
1946  * @cpu: The cpu that initiated the trace.
1947  * @cond_data: User data associated with a conditional snapshot
1948  *
1949  * Flip the buffers between the @tr and the max_tr and record information
1950  * about which task was the cause of this latency.
1951  */
1952 void
1953 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1954 	      void *cond_data)
1955 {
1956 	if (tr->stop_count)
1957 		return;
1958 
1959 	WARN_ON_ONCE(!irqs_disabled());
1960 
1961 	if (!tr->allocated_snapshot) {
1962 		/* Only the nop tracer should hit this when disabling */
1963 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1964 		return;
1965 	}
1966 
1967 	arch_spin_lock(&tr->max_lock);
1968 
1969 	/* Inherit the recordable setting from array_buffer */
1970 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1971 		ring_buffer_record_on(tr->max_buffer.buffer);
1972 	else
1973 		ring_buffer_record_off(tr->max_buffer.buffer);
1974 
1975 #ifdef CONFIG_TRACER_SNAPSHOT
1976 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1977 		arch_spin_unlock(&tr->max_lock);
1978 		return;
1979 	}
1980 #endif
1981 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1982 
1983 	__update_max_tr(tr, tsk, cpu);
1984 
1985 	arch_spin_unlock(&tr->max_lock);
1986 
1987 	/* Any waiters on the old snapshot buffer need to wake up */
1988 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1989 }
1990 
1991 /**
1992  * update_max_tr_single - only copy one trace over, and reset the rest
1993  * @tr: tracer
1994  * @tsk: task with the latency
1995  * @cpu: the cpu of the buffer to copy.
1996  *
1997  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1998  */
1999 void
2000 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2001 {
2002 	int ret;
2003 
2004 	if (tr->stop_count)
2005 		return;
2006 
2007 	WARN_ON_ONCE(!irqs_disabled());
2008 	if (!tr->allocated_snapshot) {
2009 		/* Only the nop tracer should hit this when disabling */
2010 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2011 		return;
2012 	}
2013 
2014 	arch_spin_lock(&tr->max_lock);
2015 
2016 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2017 
2018 	if (ret == -EBUSY) {
2019 		/*
2020 		 * We failed to swap the buffer due to a commit taking
2021 		 * place on this CPU. We fail to record, but we reset
2022 		 * the max trace buffer (no one writes directly to it)
2023 		 * and flag that it failed.
2024 		 * Another reason is resize is in progress.
2025 		 */
2026 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2027 			"Failed to swap buffers due to commit or resize in progress\n");
2028 	}
2029 
2030 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2031 
2032 	__update_max_tr(tr, tsk, cpu);
2033 	arch_spin_unlock(&tr->max_lock);
2034 }
2035 
2036 #endif /* CONFIG_TRACER_MAX_TRACE */
2037 
2038 struct pipe_wait {
2039 	struct trace_iterator		*iter;
2040 	int				wait_index;
2041 };
2042 
2043 static bool wait_pipe_cond(void *data)
2044 {
2045 	struct pipe_wait *pwait = data;
2046 	struct trace_iterator *iter = pwait->iter;
2047 
2048 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2049 		return true;
2050 
2051 	return iter->closed;
2052 }
2053 
2054 static int wait_on_pipe(struct trace_iterator *iter, int full)
2055 {
2056 	struct pipe_wait pwait;
2057 	int ret;
2058 
2059 	/* Iterators are static, they should be filled or empty */
2060 	if (trace_buffer_iter(iter, iter->cpu_file))
2061 		return 0;
2062 
2063 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2064 	pwait.iter = iter;
2065 
2066 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2067 			       wait_pipe_cond, &pwait);
2068 
2069 #ifdef CONFIG_TRACER_MAX_TRACE
2070 	/*
2071 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2072 	 * to happen, this would now be the main buffer.
2073 	 */
2074 	if (iter->snapshot)
2075 		iter->array_buffer = &iter->tr->max_buffer;
2076 #endif
2077 	return ret;
2078 }
2079 
2080 #ifdef CONFIG_FTRACE_STARTUP_TEST
2081 static bool selftests_can_run;
2082 
2083 struct trace_selftests {
2084 	struct list_head		list;
2085 	struct tracer			*type;
2086 };
2087 
2088 static LIST_HEAD(postponed_selftests);
2089 
2090 static int save_selftest(struct tracer *type)
2091 {
2092 	struct trace_selftests *selftest;
2093 
2094 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2095 	if (!selftest)
2096 		return -ENOMEM;
2097 
2098 	selftest->type = type;
2099 	list_add(&selftest->list, &postponed_selftests);
2100 	return 0;
2101 }
2102 
2103 static int run_tracer_selftest(struct tracer *type)
2104 {
2105 	struct trace_array *tr = &global_trace;
2106 	struct tracer *saved_tracer = tr->current_trace;
2107 	int ret;
2108 
2109 	if (!type->selftest || tracing_selftest_disabled)
2110 		return 0;
2111 
2112 	/*
2113 	 * If a tracer registers early in boot up (before scheduling is
2114 	 * initialized and such), then do not run its selftests yet.
2115 	 * Instead, run it a little later in the boot process.
2116 	 */
2117 	if (!selftests_can_run)
2118 		return save_selftest(type);
2119 
2120 	if (!tracing_is_on()) {
2121 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2122 			type->name);
2123 		return 0;
2124 	}
2125 
2126 	/*
2127 	 * Run a selftest on this tracer.
2128 	 * Here we reset the trace buffer, and set the current
2129 	 * tracer to be this tracer. The tracer can then run some
2130 	 * internal tracing to verify that everything is in order.
2131 	 * If we fail, we do not register this tracer.
2132 	 */
2133 	tracing_reset_online_cpus(&tr->array_buffer);
2134 
2135 	tr->current_trace = type;
2136 
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 	if (type->use_max_tr) {
2139 		/* If we expanded the buffers, make sure the max is expanded too */
2140 		if (tr->ring_buffer_expanded)
2141 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2142 					   RING_BUFFER_ALL_CPUS);
2143 		tr->allocated_snapshot = true;
2144 	}
2145 #endif
2146 
2147 	/* the test is responsible for initializing and enabling */
2148 	pr_info("Testing tracer %s: ", type->name);
2149 	ret = type->selftest(type, tr);
2150 	/* the test is responsible for resetting too */
2151 	tr->current_trace = saved_tracer;
2152 	if (ret) {
2153 		printk(KERN_CONT "FAILED!\n");
2154 		/* Add the warning after printing 'FAILED' */
2155 		WARN_ON(1);
2156 		return -1;
2157 	}
2158 	/* Only reset on passing, to avoid touching corrupted buffers */
2159 	tracing_reset_online_cpus(&tr->array_buffer);
2160 
2161 #ifdef CONFIG_TRACER_MAX_TRACE
2162 	if (type->use_max_tr) {
2163 		tr->allocated_snapshot = false;
2164 
2165 		/* Shrink the max buffer again */
2166 		if (tr->ring_buffer_expanded)
2167 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2168 					   RING_BUFFER_ALL_CPUS);
2169 	}
2170 #endif
2171 
2172 	printk(KERN_CONT "PASSED\n");
2173 	return 0;
2174 }
2175 
2176 static int do_run_tracer_selftest(struct tracer *type)
2177 {
2178 	int ret;
2179 
2180 	/*
2181 	 * Tests can take a long time, especially if they are run one after the
2182 	 * other, as does happen during bootup when all the tracers are
2183 	 * registered. This could cause the soft lockup watchdog to trigger.
2184 	 */
2185 	cond_resched();
2186 
2187 	tracing_selftest_running = true;
2188 	ret = run_tracer_selftest(type);
2189 	tracing_selftest_running = false;
2190 
2191 	return ret;
2192 }
2193 
2194 static __init int init_trace_selftests(void)
2195 {
2196 	struct trace_selftests *p, *n;
2197 	struct tracer *t, **last;
2198 	int ret;
2199 
2200 	selftests_can_run = true;
2201 
2202 	guard(mutex)(&trace_types_lock);
2203 
2204 	if (list_empty(&postponed_selftests))
2205 		return 0;
2206 
2207 	pr_info("Running postponed tracer tests:\n");
2208 
2209 	tracing_selftest_running = true;
2210 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2211 		/* This loop can take minutes when sanitizers are enabled, so
2212 		 * lets make sure we allow RCU processing.
2213 		 */
2214 		cond_resched();
2215 		ret = run_tracer_selftest(p->type);
2216 		/* If the test fails, then warn and remove from available_tracers */
2217 		if (ret < 0) {
2218 			WARN(1, "tracer: %s failed selftest, disabling\n",
2219 			     p->type->name);
2220 			last = &trace_types;
2221 			for (t = trace_types; t; t = t->next) {
2222 				if (t == p->type) {
2223 					*last = t->next;
2224 					break;
2225 				}
2226 				last = &t->next;
2227 			}
2228 		}
2229 		list_del(&p->list);
2230 		kfree(p);
2231 	}
2232 	tracing_selftest_running = false;
2233 
2234 	return 0;
2235 }
2236 core_initcall(init_trace_selftests);
2237 #else
2238 static inline int do_run_tracer_selftest(struct tracer *type)
2239 {
2240 	return 0;
2241 }
2242 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2243 
2244 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2245 
2246 static void __init apply_trace_boot_options(void);
2247 
2248 /**
2249  * register_tracer - register a tracer with the ftrace system.
2250  * @type: the plugin for the tracer
2251  *
2252  * Register a new plugin tracer.
2253  */
2254 int __init register_tracer(struct tracer *type)
2255 {
2256 	struct tracer *t;
2257 	int ret = 0;
2258 
2259 	if (!type->name) {
2260 		pr_info("Tracer must have a name\n");
2261 		return -1;
2262 	}
2263 
2264 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2265 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2266 		return -1;
2267 	}
2268 
2269 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2270 		pr_warn("Can not register tracer %s due to lockdown\n",
2271 			   type->name);
2272 		return -EPERM;
2273 	}
2274 
2275 	mutex_lock(&trace_types_lock);
2276 
2277 	for (t = trace_types; t; t = t->next) {
2278 		if (strcmp(type->name, t->name) == 0) {
2279 			/* already found */
2280 			pr_info("Tracer %s already registered\n",
2281 				type->name);
2282 			ret = -1;
2283 			goto out;
2284 		}
2285 	}
2286 
2287 	if (!type->set_flag)
2288 		type->set_flag = &dummy_set_flag;
2289 	if (!type->flags) {
2290 		/*allocate a dummy tracer_flags*/
2291 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2292 		if (!type->flags) {
2293 			ret = -ENOMEM;
2294 			goto out;
2295 		}
2296 		type->flags->val = 0;
2297 		type->flags->opts = dummy_tracer_opt;
2298 	} else
2299 		if (!type->flags->opts)
2300 			type->flags->opts = dummy_tracer_opt;
2301 
2302 	/* store the tracer for __set_tracer_option */
2303 	type->flags->trace = type;
2304 
2305 	ret = do_run_tracer_selftest(type);
2306 	if (ret < 0)
2307 		goto out;
2308 
2309 	type->next = trace_types;
2310 	trace_types = type;
2311 	add_tracer_options(&global_trace, type);
2312 
2313  out:
2314 	mutex_unlock(&trace_types_lock);
2315 
2316 	if (ret || !default_bootup_tracer)
2317 		goto out_unlock;
2318 
2319 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2320 		goto out_unlock;
2321 
2322 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2323 	/* Do we want this tracer to start on bootup? */
2324 	tracing_set_tracer(&global_trace, type->name);
2325 	default_bootup_tracer = NULL;
2326 
2327 	apply_trace_boot_options();
2328 
2329 	/* disable other selftests, since this will break it. */
2330 	disable_tracing_selftest("running a tracer");
2331 
2332  out_unlock:
2333 	return ret;
2334 }
2335 
2336 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2337 {
2338 	struct trace_buffer *buffer = buf->buffer;
2339 
2340 	if (!buffer)
2341 		return;
2342 
2343 	ring_buffer_record_disable(buffer);
2344 
2345 	/* Make sure all commits have finished */
2346 	synchronize_rcu();
2347 	ring_buffer_reset_cpu(buffer, cpu);
2348 
2349 	ring_buffer_record_enable(buffer);
2350 }
2351 
2352 void tracing_reset_online_cpus(struct array_buffer *buf)
2353 {
2354 	struct trace_buffer *buffer = buf->buffer;
2355 
2356 	if (!buffer)
2357 		return;
2358 
2359 	ring_buffer_record_disable(buffer);
2360 
2361 	/* Make sure all commits have finished */
2362 	synchronize_rcu();
2363 
2364 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2365 
2366 	ring_buffer_reset_online_cpus(buffer);
2367 
2368 	ring_buffer_record_enable(buffer);
2369 }
2370 
2371 static void tracing_reset_all_cpus(struct array_buffer *buf)
2372 {
2373 	struct trace_buffer *buffer = buf->buffer;
2374 
2375 	if (!buffer)
2376 		return;
2377 
2378 	ring_buffer_record_disable(buffer);
2379 
2380 	/* Make sure all commits have finished */
2381 	synchronize_rcu();
2382 
2383 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2384 
2385 	ring_buffer_reset(buffer);
2386 
2387 	ring_buffer_record_enable(buffer);
2388 }
2389 
2390 /* Must have trace_types_lock held */
2391 void tracing_reset_all_online_cpus_unlocked(void)
2392 {
2393 	struct trace_array *tr;
2394 
2395 	lockdep_assert_held(&trace_types_lock);
2396 
2397 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2398 		if (!tr->clear_trace)
2399 			continue;
2400 		tr->clear_trace = false;
2401 		tracing_reset_online_cpus(&tr->array_buffer);
2402 #ifdef CONFIG_TRACER_MAX_TRACE
2403 		tracing_reset_online_cpus(&tr->max_buffer);
2404 #endif
2405 	}
2406 }
2407 
2408 void tracing_reset_all_online_cpus(void)
2409 {
2410 	mutex_lock(&trace_types_lock);
2411 	tracing_reset_all_online_cpus_unlocked();
2412 	mutex_unlock(&trace_types_lock);
2413 }
2414 
2415 int is_tracing_stopped(void)
2416 {
2417 	return global_trace.stop_count;
2418 }
2419 
2420 static void tracing_start_tr(struct trace_array *tr)
2421 {
2422 	struct trace_buffer *buffer;
2423 	unsigned long flags;
2424 
2425 	if (tracing_disabled)
2426 		return;
2427 
2428 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2429 	if (--tr->stop_count) {
2430 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2431 			/* Someone screwed up their debugging */
2432 			tr->stop_count = 0;
2433 		}
2434 		goto out;
2435 	}
2436 
2437 	/* Prevent the buffers from switching */
2438 	arch_spin_lock(&tr->max_lock);
2439 
2440 	buffer = tr->array_buffer.buffer;
2441 	if (buffer)
2442 		ring_buffer_record_enable(buffer);
2443 
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445 	buffer = tr->max_buffer.buffer;
2446 	if (buffer)
2447 		ring_buffer_record_enable(buffer);
2448 #endif
2449 
2450 	arch_spin_unlock(&tr->max_lock);
2451 
2452  out:
2453 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2454 }
2455 
2456 /**
2457  * tracing_start - quick start of the tracer
2458  *
2459  * If tracing is enabled but was stopped by tracing_stop,
2460  * this will start the tracer back up.
2461  */
2462 void tracing_start(void)
2463 
2464 {
2465 	return tracing_start_tr(&global_trace);
2466 }
2467 
2468 static void tracing_stop_tr(struct trace_array *tr)
2469 {
2470 	struct trace_buffer *buffer;
2471 	unsigned long flags;
2472 
2473 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2474 	if (tr->stop_count++)
2475 		goto out;
2476 
2477 	/* Prevent the buffers from switching */
2478 	arch_spin_lock(&tr->max_lock);
2479 
2480 	buffer = tr->array_buffer.buffer;
2481 	if (buffer)
2482 		ring_buffer_record_disable(buffer);
2483 
2484 #ifdef CONFIG_TRACER_MAX_TRACE
2485 	buffer = tr->max_buffer.buffer;
2486 	if (buffer)
2487 		ring_buffer_record_disable(buffer);
2488 #endif
2489 
2490 	arch_spin_unlock(&tr->max_lock);
2491 
2492  out:
2493 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2494 }
2495 
2496 /**
2497  * tracing_stop - quick stop of the tracer
2498  *
2499  * Light weight way to stop tracing. Use in conjunction with
2500  * tracing_start.
2501  */
2502 void tracing_stop(void)
2503 {
2504 	return tracing_stop_tr(&global_trace);
2505 }
2506 
2507 /*
2508  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2509  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2510  * simplifies those functions and keeps them in sync.
2511  */
2512 enum print_line_t trace_handle_return(struct trace_seq *s)
2513 {
2514 	return trace_seq_has_overflowed(s) ?
2515 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2516 }
2517 EXPORT_SYMBOL_GPL(trace_handle_return);
2518 
2519 static unsigned short migration_disable_value(void)
2520 {
2521 #if defined(CONFIG_SMP)
2522 	return current->migration_disabled;
2523 #else
2524 	return 0;
2525 #endif
2526 }
2527 
2528 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2529 {
2530 	unsigned int trace_flags = irqs_status;
2531 	unsigned int pc;
2532 
2533 	pc = preempt_count();
2534 
2535 	if (pc & NMI_MASK)
2536 		trace_flags |= TRACE_FLAG_NMI;
2537 	if (pc & HARDIRQ_MASK)
2538 		trace_flags |= TRACE_FLAG_HARDIRQ;
2539 	if (in_serving_softirq())
2540 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2541 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2542 		trace_flags |= TRACE_FLAG_BH_OFF;
2543 
2544 	if (tif_need_resched())
2545 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2546 	if (test_preempt_need_resched())
2547 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2548 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2549 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2550 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2551 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2552 }
2553 
2554 struct ring_buffer_event *
2555 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2556 			  int type,
2557 			  unsigned long len,
2558 			  unsigned int trace_ctx)
2559 {
2560 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2561 }
2562 
2563 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2564 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2565 static int trace_buffered_event_ref;
2566 
2567 /**
2568  * trace_buffered_event_enable - enable buffering events
2569  *
2570  * When events are being filtered, it is quicker to use a temporary
2571  * buffer to write the event data into if there's a likely chance
2572  * that it will not be committed. The discard of the ring buffer
2573  * is not as fast as committing, and is much slower than copying
2574  * a commit.
2575  *
2576  * When an event is to be filtered, allocate per cpu buffers to
2577  * write the event data into, and if the event is filtered and discarded
2578  * it is simply dropped, otherwise, the entire data is to be committed
2579  * in one shot.
2580  */
2581 void trace_buffered_event_enable(void)
2582 {
2583 	struct ring_buffer_event *event;
2584 	struct page *page;
2585 	int cpu;
2586 
2587 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2588 
2589 	if (trace_buffered_event_ref++)
2590 		return;
2591 
2592 	for_each_tracing_cpu(cpu) {
2593 		page = alloc_pages_node(cpu_to_node(cpu),
2594 					GFP_KERNEL | __GFP_NORETRY, 0);
2595 		/* This is just an optimization and can handle failures */
2596 		if (!page) {
2597 			pr_err("Failed to allocate event buffer\n");
2598 			break;
2599 		}
2600 
2601 		event = page_address(page);
2602 		memset(event, 0, sizeof(*event));
2603 
2604 		per_cpu(trace_buffered_event, cpu) = event;
2605 
2606 		preempt_disable();
2607 		if (cpu == smp_processor_id() &&
2608 		    __this_cpu_read(trace_buffered_event) !=
2609 		    per_cpu(trace_buffered_event, cpu))
2610 			WARN_ON_ONCE(1);
2611 		preempt_enable();
2612 	}
2613 }
2614 
2615 static void enable_trace_buffered_event(void *data)
2616 {
2617 	/* Probably not needed, but do it anyway */
2618 	smp_rmb();
2619 	this_cpu_dec(trace_buffered_event_cnt);
2620 }
2621 
2622 static void disable_trace_buffered_event(void *data)
2623 {
2624 	this_cpu_inc(trace_buffered_event_cnt);
2625 }
2626 
2627 /**
2628  * trace_buffered_event_disable - disable buffering events
2629  *
2630  * When a filter is removed, it is faster to not use the buffered
2631  * events, and to commit directly into the ring buffer. Free up
2632  * the temp buffers when there are no more users. This requires
2633  * special synchronization with current events.
2634  */
2635 void trace_buffered_event_disable(void)
2636 {
2637 	int cpu;
2638 
2639 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2640 
2641 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2642 		return;
2643 
2644 	if (--trace_buffered_event_ref)
2645 		return;
2646 
2647 	/* For each CPU, set the buffer as used. */
2648 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2649 			 NULL, true);
2650 
2651 	/* Wait for all current users to finish */
2652 	synchronize_rcu();
2653 
2654 	for_each_tracing_cpu(cpu) {
2655 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2656 		per_cpu(trace_buffered_event, cpu) = NULL;
2657 	}
2658 
2659 	/*
2660 	 * Wait for all CPUs that potentially started checking if they can use
2661 	 * their event buffer only after the previous synchronize_rcu() call and
2662 	 * they still read a valid pointer from trace_buffered_event. It must be
2663 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2664 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2665 	 */
2666 	synchronize_rcu();
2667 
2668 	/* For each CPU, relinquish the buffer */
2669 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2670 			 true);
2671 }
2672 
2673 static struct trace_buffer *temp_buffer;
2674 
2675 struct ring_buffer_event *
2676 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2677 			  struct trace_event_file *trace_file,
2678 			  int type, unsigned long len,
2679 			  unsigned int trace_ctx)
2680 {
2681 	struct ring_buffer_event *entry;
2682 	struct trace_array *tr = trace_file->tr;
2683 	int val;
2684 
2685 	*current_rb = tr->array_buffer.buffer;
2686 
2687 	if (!tr->no_filter_buffering_ref &&
2688 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2689 		preempt_disable_notrace();
2690 		/*
2691 		 * Filtering is on, so try to use the per cpu buffer first.
2692 		 * This buffer will simulate a ring_buffer_event,
2693 		 * where the type_len is zero and the array[0] will
2694 		 * hold the full length.
2695 		 * (see include/linux/ring-buffer.h for details on
2696 		 *  how the ring_buffer_event is structured).
2697 		 *
2698 		 * Using a temp buffer during filtering and copying it
2699 		 * on a matched filter is quicker than writing directly
2700 		 * into the ring buffer and then discarding it when
2701 		 * it doesn't match. That is because the discard
2702 		 * requires several atomic operations to get right.
2703 		 * Copying on match and doing nothing on a failed match
2704 		 * is still quicker than no copy on match, but having
2705 		 * to discard out of the ring buffer on a failed match.
2706 		 */
2707 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2708 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2709 
2710 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2711 
2712 			/*
2713 			 * Preemption is disabled, but interrupts and NMIs
2714 			 * can still come in now. If that happens after
2715 			 * the above increment, then it will have to go
2716 			 * back to the old method of allocating the event
2717 			 * on the ring buffer, and if the filter fails, it
2718 			 * will have to call ring_buffer_discard_commit()
2719 			 * to remove it.
2720 			 *
2721 			 * Need to also check the unlikely case that the
2722 			 * length is bigger than the temp buffer size.
2723 			 * If that happens, then the reserve is pretty much
2724 			 * guaranteed to fail, as the ring buffer currently
2725 			 * only allows events less than a page. But that may
2726 			 * change in the future, so let the ring buffer reserve
2727 			 * handle the failure in that case.
2728 			 */
2729 			if (val == 1 && likely(len <= max_len)) {
2730 				trace_event_setup(entry, type, trace_ctx);
2731 				entry->array[0] = len;
2732 				/* Return with preemption disabled */
2733 				return entry;
2734 			}
2735 			this_cpu_dec(trace_buffered_event_cnt);
2736 		}
2737 		/* __trace_buffer_lock_reserve() disables preemption */
2738 		preempt_enable_notrace();
2739 	}
2740 
2741 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2742 					    trace_ctx);
2743 	/*
2744 	 * If tracing is off, but we have triggers enabled
2745 	 * we still need to look at the event data. Use the temp_buffer
2746 	 * to store the trace event for the trigger to use. It's recursive
2747 	 * safe and will not be recorded anywhere.
2748 	 */
2749 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2750 		*current_rb = temp_buffer;
2751 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2752 						    trace_ctx);
2753 	}
2754 	return entry;
2755 }
2756 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2757 
2758 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2759 static DEFINE_MUTEX(tracepoint_printk_mutex);
2760 
2761 static void output_printk(struct trace_event_buffer *fbuffer)
2762 {
2763 	struct trace_event_call *event_call;
2764 	struct trace_event_file *file;
2765 	struct trace_event *event;
2766 	unsigned long flags;
2767 	struct trace_iterator *iter = tracepoint_print_iter;
2768 
2769 	/* We should never get here if iter is NULL */
2770 	if (WARN_ON_ONCE(!iter))
2771 		return;
2772 
2773 	event_call = fbuffer->trace_file->event_call;
2774 	if (!event_call || !event_call->event.funcs ||
2775 	    !event_call->event.funcs->trace)
2776 		return;
2777 
2778 	file = fbuffer->trace_file;
2779 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2780 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2781 	     !filter_match_preds(file->filter, fbuffer->entry)))
2782 		return;
2783 
2784 	event = &fbuffer->trace_file->event_call->event;
2785 
2786 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2787 	trace_seq_init(&iter->seq);
2788 	iter->ent = fbuffer->entry;
2789 	event_call->event.funcs->trace(iter, 0, event);
2790 	trace_seq_putc(&iter->seq, 0);
2791 	printk("%s", iter->seq.buffer);
2792 
2793 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2794 }
2795 
2796 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2797 			     void *buffer, size_t *lenp,
2798 			     loff_t *ppos)
2799 {
2800 	int save_tracepoint_printk;
2801 	int ret;
2802 
2803 	guard(mutex)(&tracepoint_printk_mutex);
2804 	save_tracepoint_printk = tracepoint_printk;
2805 
2806 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2807 
2808 	/*
2809 	 * This will force exiting early, as tracepoint_printk
2810 	 * is always zero when tracepoint_printk_iter is not allocated
2811 	 */
2812 	if (!tracepoint_print_iter)
2813 		tracepoint_printk = 0;
2814 
2815 	if (save_tracepoint_printk == tracepoint_printk)
2816 		return ret;
2817 
2818 	if (tracepoint_printk)
2819 		static_key_enable(&tracepoint_printk_key.key);
2820 	else
2821 		static_key_disable(&tracepoint_printk_key.key);
2822 
2823 	return ret;
2824 }
2825 
2826 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2827 {
2828 	enum event_trigger_type tt = ETT_NONE;
2829 	struct trace_event_file *file = fbuffer->trace_file;
2830 
2831 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2832 			fbuffer->entry, &tt))
2833 		goto discard;
2834 
2835 	if (static_key_false(&tracepoint_printk_key.key))
2836 		output_printk(fbuffer);
2837 
2838 	if (static_branch_unlikely(&trace_event_exports_enabled))
2839 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2840 
2841 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2842 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2843 
2844 discard:
2845 	if (tt)
2846 		event_triggers_post_call(file, tt);
2847 
2848 }
2849 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2850 
2851 /*
2852  * Skip 3:
2853  *
2854  *   trace_buffer_unlock_commit_regs()
2855  *   trace_event_buffer_commit()
2856  *   trace_event_raw_event_xxx()
2857  */
2858 # define STACK_SKIP 3
2859 
2860 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2861 				     struct trace_buffer *buffer,
2862 				     struct ring_buffer_event *event,
2863 				     unsigned int trace_ctx,
2864 				     struct pt_regs *regs)
2865 {
2866 	__buffer_unlock_commit(buffer, event);
2867 
2868 	/*
2869 	 * If regs is not set, then skip the necessary functions.
2870 	 * Note, we can still get here via blktrace, wakeup tracer
2871 	 * and mmiotrace, but that's ok if they lose a function or
2872 	 * two. They are not that meaningful.
2873 	 */
2874 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2875 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2876 }
2877 
2878 /*
2879  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2880  */
2881 void
2882 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2883 				   struct ring_buffer_event *event)
2884 {
2885 	__buffer_unlock_commit(buffer, event);
2886 }
2887 
2888 void
2889 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2890 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2891 {
2892 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2893 	struct ring_buffer_event *event;
2894 	struct ftrace_entry *entry;
2895 	int size = sizeof(*entry);
2896 
2897 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2898 
2899 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2900 					    trace_ctx);
2901 	if (!event)
2902 		return;
2903 	entry	= ring_buffer_event_data(event);
2904 	entry->ip			= ip;
2905 	entry->parent_ip		= parent_ip;
2906 
2907 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2908 	if (fregs) {
2909 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2910 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2911 	}
2912 #endif
2913 
2914 	if (static_branch_unlikely(&trace_function_exports_enabled))
2915 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916 	__buffer_unlock_commit(buffer, event);
2917 }
2918 
2919 #ifdef CONFIG_STACKTRACE
2920 
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING	4
2923 
2924 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2925 
2926 struct ftrace_stack {
2927 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929 
2930 
2931 struct ftrace_stacks {
2932 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2933 };
2934 
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937 
2938 static void __ftrace_trace_stack(struct trace_array *tr,
2939 				 struct trace_buffer *buffer,
2940 				 unsigned int trace_ctx,
2941 				 int skip, struct pt_regs *regs)
2942 {
2943 	struct ring_buffer_event *event;
2944 	unsigned int size, nr_entries;
2945 	struct ftrace_stack *fstack;
2946 	struct stack_entry *entry;
2947 	int stackidx;
2948 
2949 	/*
2950 	 * Add one, for this function and the call to save_stack_trace()
2951 	 * If regs is set, then these functions will not be in the way.
2952 	 */
2953 #ifndef CONFIG_UNWINDER_ORC
2954 	if (!regs)
2955 		skip++;
2956 #endif
2957 
2958 	preempt_disable_notrace();
2959 
2960 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961 
2962 	/* This should never happen. If it does, yell once and skip */
2963 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964 		goto out;
2965 
2966 	/*
2967 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968 	 * interrupt will either see the value pre increment or post
2969 	 * increment. If the interrupt happens pre increment it will have
2970 	 * restored the counter when it returns.  We just need a barrier to
2971 	 * keep gcc from moving things around.
2972 	 */
2973 	barrier();
2974 
2975 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976 	size = ARRAY_SIZE(fstack->calls);
2977 
2978 	if (regs) {
2979 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980 						   size, skip);
2981 	} else {
2982 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2983 	}
2984 
2985 #ifdef CONFIG_DYNAMIC_FTRACE
2986 	/* Mark entry of stack trace as trampoline code */
2987 	if (tr->ops && tr->ops->trampoline) {
2988 		unsigned long tramp_start = tr->ops->trampoline;
2989 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2990 		unsigned long *calls = fstack->calls;
2991 
2992 		for (int i = 0; i < nr_entries; i++) {
2993 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2994 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2995 		}
2996 	}
2997 #endif
2998 
2999 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3000 				    struct_size(entry, caller, nr_entries),
3001 				    trace_ctx);
3002 	if (!event)
3003 		goto out;
3004 	entry = ring_buffer_event_data(event);
3005 
3006 	entry->size = nr_entries;
3007 	memcpy(&entry->caller, fstack->calls,
3008 	       flex_array_size(entry, caller, nr_entries));
3009 
3010 	__buffer_unlock_commit(buffer, event);
3011 
3012  out:
3013 	/* Again, don't let gcc optimize things here */
3014 	barrier();
3015 	__this_cpu_dec(ftrace_stack_reserve);
3016 	preempt_enable_notrace();
3017 
3018 }
3019 
3020 static inline void ftrace_trace_stack(struct trace_array *tr,
3021 				      struct trace_buffer *buffer,
3022 				      unsigned int trace_ctx,
3023 				      int skip, struct pt_regs *regs)
3024 {
3025 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3026 		return;
3027 
3028 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3029 }
3030 
3031 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3032 		   int skip)
3033 {
3034 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3035 
3036 	if (rcu_is_watching()) {
3037 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038 		return;
3039 	}
3040 
3041 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3042 		return;
3043 
3044 	/*
3045 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3046 	 * but if the above rcu_is_watching() failed, then the NMI
3047 	 * triggered someplace critical, and ct_irq_enter() should
3048 	 * not be called from NMI.
3049 	 */
3050 	if (unlikely(in_nmi()))
3051 		return;
3052 
3053 	ct_irq_enter_irqson();
3054 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3055 	ct_irq_exit_irqson();
3056 }
3057 
3058 /**
3059  * trace_dump_stack - record a stack back trace in the trace buffer
3060  * @skip: Number of functions to skip (helper handlers)
3061  */
3062 void trace_dump_stack(int skip)
3063 {
3064 	if (tracing_disabled || tracing_selftest_running)
3065 		return;
3066 
3067 #ifndef CONFIG_UNWINDER_ORC
3068 	/* Skip 1 to skip this function. */
3069 	skip++;
3070 #endif
3071 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3072 				tracing_gen_ctx(), skip, NULL);
3073 }
3074 EXPORT_SYMBOL_GPL(trace_dump_stack);
3075 
3076 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3077 static DEFINE_PER_CPU(int, user_stack_count);
3078 
3079 static void
3080 ftrace_trace_userstack(struct trace_array *tr,
3081 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3082 {
3083 	struct ring_buffer_event *event;
3084 	struct userstack_entry *entry;
3085 
3086 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 		return;
3088 
3089 	/*
3090 	 * NMIs can not handle page faults, even with fix ups.
3091 	 * The save user stack can (and often does) fault.
3092 	 */
3093 	if (unlikely(in_nmi()))
3094 		return;
3095 
3096 	/*
3097 	 * prevent recursion, since the user stack tracing may
3098 	 * trigger other kernel events.
3099 	 */
3100 	preempt_disable();
3101 	if (__this_cpu_read(user_stack_count))
3102 		goto out;
3103 
3104 	__this_cpu_inc(user_stack_count);
3105 
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 					    sizeof(*entry), trace_ctx);
3108 	if (!event)
3109 		goto out_drop_count;
3110 	entry	= ring_buffer_event_data(event);
3111 
3112 	entry->tgid		= current->tgid;
3113 	memset(&entry->caller, 0, sizeof(entry->caller));
3114 
3115 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 	__buffer_unlock_commit(buffer, event);
3117 
3118  out_drop_count:
3119 	__this_cpu_dec(user_stack_count);
3120  out:
3121 	preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125 				   struct trace_buffer *buffer,
3126 				   unsigned int trace_ctx)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130 
3131 #endif /* CONFIG_STACKTRACE */
3132 
3133 static inline void
3134 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3135 			  unsigned long long delta)
3136 {
3137 	entry->bottom_delta_ts = delta & U32_MAX;
3138 	entry->top_delta_ts = (delta >> 32);
3139 }
3140 
3141 void trace_last_func_repeats(struct trace_array *tr,
3142 			     struct trace_func_repeats *last_info,
3143 			     unsigned int trace_ctx)
3144 {
3145 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3146 	struct func_repeats_entry *entry;
3147 	struct ring_buffer_event *event;
3148 	u64 delta;
3149 
3150 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3151 					    sizeof(*entry), trace_ctx);
3152 	if (!event)
3153 		return;
3154 
3155 	delta = ring_buffer_event_time_stamp(buffer, event) -
3156 		last_info->ts_last_call;
3157 
3158 	entry = ring_buffer_event_data(event);
3159 	entry->ip = last_info->ip;
3160 	entry->parent_ip = last_info->parent_ip;
3161 	entry->count = last_info->count;
3162 	func_repeats_set_delta_ts(entry, delta);
3163 
3164 	__buffer_unlock_commit(buffer, event);
3165 }
3166 
3167 /* created for use with alloc_percpu */
3168 struct trace_buffer_struct {
3169 	int nesting;
3170 	char buffer[4][TRACE_BUF_SIZE];
3171 };
3172 
3173 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3174 
3175 /*
3176  * This allows for lockless recording.  If we're nested too deeply, then
3177  * this returns NULL.
3178  */
3179 static char *get_trace_buf(void)
3180 {
3181 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3182 
3183 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3184 		return NULL;
3185 
3186 	buffer->nesting++;
3187 
3188 	/* Interrupts must see nesting incremented before we use the buffer */
3189 	barrier();
3190 	return &buffer->buffer[buffer->nesting - 1][0];
3191 }
3192 
3193 static void put_trace_buf(void)
3194 {
3195 	/* Don't let the decrement of nesting leak before this */
3196 	barrier();
3197 	this_cpu_dec(trace_percpu_buffer->nesting);
3198 }
3199 
3200 static int alloc_percpu_trace_buffer(void)
3201 {
3202 	struct trace_buffer_struct __percpu *buffers;
3203 
3204 	if (trace_percpu_buffer)
3205 		return 0;
3206 
3207 	buffers = alloc_percpu(struct trace_buffer_struct);
3208 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3209 		return -ENOMEM;
3210 
3211 	trace_percpu_buffer = buffers;
3212 	return 0;
3213 }
3214 
3215 static int buffers_allocated;
3216 
3217 void trace_printk_init_buffers(void)
3218 {
3219 	if (buffers_allocated)
3220 		return;
3221 
3222 	if (alloc_percpu_trace_buffer())
3223 		return;
3224 
3225 	/* trace_printk() is for debug use only. Don't use it in production. */
3226 
3227 	pr_warn("\n");
3228 	pr_warn("**********************************************************\n");
3229 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3230 	pr_warn("**                                                      **\n");
3231 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3232 	pr_warn("**                                                      **\n");
3233 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3234 	pr_warn("** unsafe for production use.                           **\n");
3235 	pr_warn("**                                                      **\n");
3236 	pr_warn("** If you see this message and you are not debugging    **\n");
3237 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3238 	pr_warn("**                                                      **\n");
3239 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3240 	pr_warn("**********************************************************\n");
3241 
3242 	/* Expand the buffers to set size */
3243 	tracing_update_buffers(&global_trace);
3244 
3245 	buffers_allocated = 1;
3246 
3247 	/*
3248 	 * trace_printk_init_buffers() can be called by modules.
3249 	 * If that happens, then we need to start cmdline recording
3250 	 * directly here. If the global_trace.buffer is already
3251 	 * allocated here, then this was called by module code.
3252 	 */
3253 	if (global_trace.array_buffer.buffer)
3254 		tracing_start_cmdline_record();
3255 }
3256 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3257 
3258 void trace_printk_start_comm(void)
3259 {
3260 	/* Start tracing comms if trace printk is set */
3261 	if (!buffers_allocated)
3262 		return;
3263 	tracing_start_cmdline_record();
3264 }
3265 
3266 static void trace_printk_start_stop_comm(int enabled)
3267 {
3268 	if (!buffers_allocated)
3269 		return;
3270 
3271 	if (enabled)
3272 		tracing_start_cmdline_record();
3273 	else
3274 		tracing_stop_cmdline_record();
3275 }
3276 
3277 /**
3278  * trace_vbprintk - write binary msg to tracing buffer
3279  * @ip:    The address of the caller
3280  * @fmt:   The string format to write to the buffer
3281  * @args:  Arguments for @fmt
3282  */
3283 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3284 {
3285 	struct ring_buffer_event *event;
3286 	struct trace_buffer *buffer;
3287 	struct trace_array *tr = READ_ONCE(printk_trace);
3288 	struct bprint_entry *entry;
3289 	unsigned int trace_ctx;
3290 	char *tbuffer;
3291 	int len = 0, size;
3292 
3293 	if (!printk_binsafe(tr))
3294 		return trace_vprintk(ip, fmt, args);
3295 
3296 	if (unlikely(tracing_selftest_running || tracing_disabled))
3297 		return 0;
3298 
3299 	/* Don't pollute graph traces with trace_vprintk internals */
3300 	pause_graph_tracing();
3301 
3302 	trace_ctx = tracing_gen_ctx();
3303 	preempt_disable_notrace();
3304 
3305 	tbuffer = get_trace_buf();
3306 	if (!tbuffer) {
3307 		len = 0;
3308 		goto out_nobuffer;
3309 	}
3310 
3311 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3312 
3313 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3314 		goto out_put;
3315 
3316 	size = sizeof(*entry) + sizeof(u32) * len;
3317 	buffer = tr->array_buffer.buffer;
3318 	ring_buffer_nest_start(buffer);
3319 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3320 					    trace_ctx);
3321 	if (!event)
3322 		goto out;
3323 	entry = ring_buffer_event_data(event);
3324 	entry->ip			= ip;
3325 	entry->fmt			= fmt;
3326 
3327 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3328 	__buffer_unlock_commit(buffer, event);
3329 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3330 
3331 out:
3332 	ring_buffer_nest_end(buffer);
3333 out_put:
3334 	put_trace_buf();
3335 
3336 out_nobuffer:
3337 	preempt_enable_notrace();
3338 	unpause_graph_tracing();
3339 
3340 	return len;
3341 }
3342 EXPORT_SYMBOL_GPL(trace_vbprintk);
3343 
3344 __printf(3, 0)
3345 static int
3346 __trace_array_vprintk(struct trace_buffer *buffer,
3347 		      unsigned long ip, const char *fmt, va_list args)
3348 {
3349 	struct ring_buffer_event *event;
3350 	int len = 0, size;
3351 	struct print_entry *entry;
3352 	unsigned int trace_ctx;
3353 	char *tbuffer;
3354 
3355 	if (tracing_disabled)
3356 		return 0;
3357 
3358 	/* Don't pollute graph traces with trace_vprintk internals */
3359 	pause_graph_tracing();
3360 
3361 	trace_ctx = tracing_gen_ctx();
3362 	preempt_disable_notrace();
3363 
3364 
3365 	tbuffer = get_trace_buf();
3366 	if (!tbuffer) {
3367 		len = 0;
3368 		goto out_nobuffer;
3369 	}
3370 
3371 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3372 
3373 	size = sizeof(*entry) + len + 1;
3374 	ring_buffer_nest_start(buffer);
3375 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3376 					    trace_ctx);
3377 	if (!event)
3378 		goto out;
3379 	entry = ring_buffer_event_data(event);
3380 	entry->ip = ip;
3381 
3382 	memcpy(&entry->buf, tbuffer, len + 1);
3383 	__buffer_unlock_commit(buffer, event);
3384 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3385 
3386 out:
3387 	ring_buffer_nest_end(buffer);
3388 	put_trace_buf();
3389 
3390 out_nobuffer:
3391 	preempt_enable_notrace();
3392 	unpause_graph_tracing();
3393 
3394 	return len;
3395 }
3396 
3397 __printf(3, 0)
3398 int trace_array_vprintk(struct trace_array *tr,
3399 			unsigned long ip, const char *fmt, va_list args)
3400 {
3401 	if (tracing_selftest_running && tr == &global_trace)
3402 		return 0;
3403 
3404 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3405 }
3406 
3407 /**
3408  * trace_array_printk - Print a message to a specific instance
3409  * @tr: The instance trace_array descriptor
3410  * @ip: The instruction pointer that this is called from.
3411  * @fmt: The format to print (printf format)
3412  *
3413  * If a subsystem sets up its own instance, they have the right to
3414  * printk strings into their tracing instance buffer using this
3415  * function. Note, this function will not write into the top level
3416  * buffer (use trace_printk() for that), as writing into the top level
3417  * buffer should only have events that can be individually disabled.
3418  * trace_printk() is only used for debugging a kernel, and should not
3419  * be ever incorporated in normal use.
3420  *
3421  * trace_array_printk() can be used, as it will not add noise to the
3422  * top level tracing buffer.
3423  *
3424  * Note, trace_array_init_printk() must be called on @tr before this
3425  * can be used.
3426  */
3427 __printf(3, 0)
3428 int trace_array_printk(struct trace_array *tr,
3429 		       unsigned long ip, const char *fmt, ...)
3430 {
3431 	int ret;
3432 	va_list ap;
3433 
3434 	if (!tr)
3435 		return -ENOENT;
3436 
3437 	/* This is only allowed for created instances */
3438 	if (tr == &global_trace)
3439 		return 0;
3440 
3441 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3442 		return 0;
3443 
3444 	va_start(ap, fmt);
3445 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3446 	va_end(ap);
3447 	return ret;
3448 }
3449 EXPORT_SYMBOL_GPL(trace_array_printk);
3450 
3451 /**
3452  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3453  * @tr: The trace array to initialize the buffers for
3454  *
3455  * As trace_array_printk() only writes into instances, they are OK to
3456  * have in the kernel (unlike trace_printk()). This needs to be called
3457  * before trace_array_printk() can be used on a trace_array.
3458  */
3459 int trace_array_init_printk(struct trace_array *tr)
3460 {
3461 	if (!tr)
3462 		return -ENOENT;
3463 
3464 	/* This is only allowed for created instances */
3465 	if (tr == &global_trace)
3466 		return -EINVAL;
3467 
3468 	return alloc_percpu_trace_buffer();
3469 }
3470 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3471 
3472 __printf(3, 4)
3473 int trace_array_printk_buf(struct trace_buffer *buffer,
3474 			   unsigned long ip, const char *fmt, ...)
3475 {
3476 	int ret;
3477 	va_list ap;
3478 
3479 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3480 		return 0;
3481 
3482 	va_start(ap, fmt);
3483 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3484 	va_end(ap);
3485 	return ret;
3486 }
3487 
3488 __printf(2, 0)
3489 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3490 {
3491 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3492 }
3493 EXPORT_SYMBOL_GPL(trace_vprintk);
3494 
3495 static void trace_iterator_increment(struct trace_iterator *iter)
3496 {
3497 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3498 
3499 	iter->idx++;
3500 	if (buf_iter)
3501 		ring_buffer_iter_advance(buf_iter);
3502 }
3503 
3504 static struct trace_entry *
3505 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3506 		unsigned long *lost_events)
3507 {
3508 	struct ring_buffer_event *event;
3509 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3510 
3511 	if (buf_iter) {
3512 		event = ring_buffer_iter_peek(buf_iter, ts);
3513 		if (lost_events)
3514 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3515 				(unsigned long)-1 : 0;
3516 	} else {
3517 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3518 					 lost_events);
3519 	}
3520 
3521 	if (event) {
3522 		iter->ent_size = ring_buffer_event_length(event);
3523 		return ring_buffer_event_data(event);
3524 	}
3525 	iter->ent_size = 0;
3526 	return NULL;
3527 }
3528 
3529 static struct trace_entry *
3530 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3531 		  unsigned long *missing_events, u64 *ent_ts)
3532 {
3533 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3534 	struct trace_entry *ent, *next = NULL;
3535 	unsigned long lost_events = 0, next_lost = 0;
3536 	int cpu_file = iter->cpu_file;
3537 	u64 next_ts = 0, ts;
3538 	int next_cpu = -1;
3539 	int next_size = 0;
3540 	int cpu;
3541 
3542 	/*
3543 	 * If we are in a per_cpu trace file, don't bother by iterating over
3544 	 * all cpu and peek directly.
3545 	 */
3546 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3547 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3548 			return NULL;
3549 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3550 		if (ent_cpu)
3551 			*ent_cpu = cpu_file;
3552 
3553 		return ent;
3554 	}
3555 
3556 	for_each_tracing_cpu(cpu) {
3557 
3558 		if (ring_buffer_empty_cpu(buffer, cpu))
3559 			continue;
3560 
3561 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3562 
3563 		/*
3564 		 * Pick the entry with the smallest timestamp:
3565 		 */
3566 		if (ent && (!next || ts < next_ts)) {
3567 			next = ent;
3568 			next_cpu = cpu;
3569 			next_ts = ts;
3570 			next_lost = lost_events;
3571 			next_size = iter->ent_size;
3572 		}
3573 	}
3574 
3575 	iter->ent_size = next_size;
3576 
3577 	if (ent_cpu)
3578 		*ent_cpu = next_cpu;
3579 
3580 	if (ent_ts)
3581 		*ent_ts = next_ts;
3582 
3583 	if (missing_events)
3584 		*missing_events = next_lost;
3585 
3586 	return next;
3587 }
3588 
3589 #define STATIC_FMT_BUF_SIZE	128
3590 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3591 
3592 char *trace_iter_expand_format(struct trace_iterator *iter)
3593 {
3594 	char *tmp;
3595 
3596 	/*
3597 	 * iter->tr is NULL when used with tp_printk, which makes
3598 	 * this get called where it is not safe to call krealloc().
3599 	 */
3600 	if (!iter->tr || iter->fmt == static_fmt_buf)
3601 		return NULL;
3602 
3603 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3604 		       GFP_KERNEL);
3605 	if (tmp) {
3606 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3607 		iter->fmt = tmp;
3608 	}
3609 
3610 	return tmp;
3611 }
3612 
3613 /* Returns true if the string is safe to dereference from an event */
3614 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3615 {
3616 	unsigned long addr = (unsigned long)str;
3617 	struct trace_event *trace_event;
3618 	struct trace_event_call *event;
3619 
3620 	/* OK if part of the event data */
3621 	if ((addr >= (unsigned long)iter->ent) &&
3622 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3623 		return true;
3624 
3625 	/* OK if part of the temp seq buffer */
3626 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3627 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3628 		return true;
3629 
3630 	/* Core rodata can not be freed */
3631 	if (is_kernel_rodata(addr))
3632 		return true;
3633 
3634 	if (trace_is_tracepoint_string(str))
3635 		return true;
3636 
3637 	/*
3638 	 * Now this could be a module event, referencing core module
3639 	 * data, which is OK.
3640 	 */
3641 	if (!iter->ent)
3642 		return false;
3643 
3644 	trace_event = ftrace_find_event(iter->ent->type);
3645 	if (!trace_event)
3646 		return false;
3647 
3648 	event = container_of(trace_event, struct trace_event_call, event);
3649 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3650 		return false;
3651 
3652 	/* Would rather have rodata, but this will suffice */
3653 	if (within_module_core(addr, event->module))
3654 		return true;
3655 
3656 	return false;
3657 }
3658 
3659 /**
3660  * ignore_event - Check dereferenced fields while writing to the seq buffer
3661  * @iter: The iterator that holds the seq buffer and the event being printed
3662  *
3663  * At boot up, test_event_printk() will flag any event that dereferences
3664  * a string with "%s" that does exist in the ring buffer. It may still
3665  * be valid, as the string may point to a static string in the kernel
3666  * rodata that never gets freed. But if the string pointer is pointing
3667  * to something that was allocated, there's a chance that it can be freed
3668  * by the time the user reads the trace. This would cause a bad memory
3669  * access by the kernel and possibly crash the system.
3670  *
3671  * This function will check if the event has any fields flagged as needing
3672  * to be checked at runtime and perform those checks.
3673  *
3674  * If it is found that a field is unsafe, it will write into the @iter->seq
3675  * a message stating what was found to be unsafe.
3676  *
3677  * @return: true if the event is unsafe and should be ignored,
3678  *          false otherwise.
3679  */
3680 bool ignore_event(struct trace_iterator *iter)
3681 {
3682 	struct ftrace_event_field *field;
3683 	struct trace_event *trace_event;
3684 	struct trace_event_call *event;
3685 	struct list_head *head;
3686 	struct trace_seq *seq;
3687 	const void *ptr;
3688 
3689 	trace_event = ftrace_find_event(iter->ent->type);
3690 
3691 	seq = &iter->seq;
3692 
3693 	if (!trace_event) {
3694 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3695 		return true;
3696 	}
3697 
3698 	event = container_of(trace_event, struct trace_event_call, event);
3699 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3700 		return false;
3701 
3702 	head = trace_get_fields(event);
3703 	if (!head) {
3704 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3705 				 trace_event_name(event));
3706 		return true;
3707 	}
3708 
3709 	/* Offsets are from the iter->ent that points to the raw event */
3710 	ptr = iter->ent;
3711 
3712 	list_for_each_entry(field, head, link) {
3713 		const char *str;
3714 		bool good;
3715 
3716 		if (!field->needs_test)
3717 			continue;
3718 
3719 		str = *(const char **)(ptr + field->offset);
3720 
3721 		good = trace_safe_str(iter, str);
3722 
3723 		/*
3724 		 * If you hit this warning, it is likely that the
3725 		 * trace event in question used %s on a string that
3726 		 * was saved at the time of the event, but may not be
3727 		 * around when the trace is read. Use __string(),
3728 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3729 		 * instead. See samples/trace_events/trace-events-sample.h
3730 		 * for reference.
3731 		 */
3732 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3733 			      trace_event_name(event), field->name)) {
3734 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3735 					 trace_event_name(event), field->name);
3736 			return true;
3737 		}
3738 	}
3739 	return false;
3740 }
3741 
3742 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3743 {
3744 	const char *p, *new_fmt;
3745 	char *q;
3746 
3747 	if (WARN_ON_ONCE(!fmt))
3748 		return fmt;
3749 
3750 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3751 		return fmt;
3752 
3753 	p = fmt;
3754 	new_fmt = q = iter->fmt;
3755 	while (*p) {
3756 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3757 			if (!trace_iter_expand_format(iter))
3758 				return fmt;
3759 
3760 			q += iter->fmt - new_fmt;
3761 			new_fmt = iter->fmt;
3762 		}
3763 
3764 		*q++ = *p++;
3765 
3766 		/* Replace %p with %px */
3767 		if (p[-1] == '%') {
3768 			if (p[0] == '%') {
3769 				*q++ = *p++;
3770 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3771 				*q++ = *p++;
3772 				*q++ = 'x';
3773 			}
3774 		}
3775 	}
3776 	*q = '\0';
3777 
3778 	return new_fmt;
3779 }
3780 
3781 #define STATIC_TEMP_BUF_SIZE	128
3782 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3783 
3784 /* Find the next real entry, without updating the iterator itself */
3785 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3786 					  int *ent_cpu, u64 *ent_ts)
3787 {
3788 	/* __find_next_entry will reset ent_size */
3789 	int ent_size = iter->ent_size;
3790 	struct trace_entry *entry;
3791 
3792 	/*
3793 	 * If called from ftrace_dump(), then the iter->temp buffer
3794 	 * will be the static_temp_buf and not created from kmalloc.
3795 	 * If the entry size is greater than the buffer, we can
3796 	 * not save it. Just return NULL in that case. This is only
3797 	 * used to add markers when two consecutive events' time
3798 	 * stamps have a large delta. See trace_print_lat_context()
3799 	 */
3800 	if (iter->temp == static_temp_buf &&
3801 	    STATIC_TEMP_BUF_SIZE < ent_size)
3802 		return NULL;
3803 
3804 	/*
3805 	 * The __find_next_entry() may call peek_next_entry(), which may
3806 	 * call ring_buffer_peek() that may make the contents of iter->ent
3807 	 * undefined. Need to copy iter->ent now.
3808 	 */
3809 	if (iter->ent && iter->ent != iter->temp) {
3810 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3811 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3812 			void *temp;
3813 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3814 			if (!temp)
3815 				return NULL;
3816 			kfree(iter->temp);
3817 			iter->temp = temp;
3818 			iter->temp_size = iter->ent_size;
3819 		}
3820 		memcpy(iter->temp, iter->ent, iter->ent_size);
3821 		iter->ent = iter->temp;
3822 	}
3823 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3824 	/* Put back the original ent_size */
3825 	iter->ent_size = ent_size;
3826 
3827 	return entry;
3828 }
3829 
3830 /* Find the next real entry, and increment the iterator to the next entry */
3831 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3832 {
3833 	iter->ent = __find_next_entry(iter, &iter->cpu,
3834 				      &iter->lost_events, &iter->ts);
3835 
3836 	if (iter->ent)
3837 		trace_iterator_increment(iter);
3838 
3839 	return iter->ent ? iter : NULL;
3840 }
3841 
3842 static void trace_consume(struct trace_iterator *iter)
3843 {
3844 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3845 			    &iter->lost_events);
3846 }
3847 
3848 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3849 {
3850 	struct trace_iterator *iter = m->private;
3851 	int i = (int)*pos;
3852 	void *ent;
3853 
3854 	WARN_ON_ONCE(iter->leftover);
3855 
3856 	(*pos)++;
3857 
3858 	/* can't go backwards */
3859 	if (iter->idx > i)
3860 		return NULL;
3861 
3862 	if (iter->idx < 0)
3863 		ent = trace_find_next_entry_inc(iter);
3864 	else
3865 		ent = iter;
3866 
3867 	while (ent && iter->idx < i)
3868 		ent = trace_find_next_entry_inc(iter);
3869 
3870 	iter->pos = *pos;
3871 
3872 	return ent;
3873 }
3874 
3875 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3876 {
3877 	struct ring_buffer_iter *buf_iter;
3878 	unsigned long entries = 0;
3879 	u64 ts;
3880 
3881 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3882 
3883 	buf_iter = trace_buffer_iter(iter, cpu);
3884 	if (!buf_iter)
3885 		return;
3886 
3887 	ring_buffer_iter_reset(buf_iter);
3888 
3889 	/*
3890 	 * We could have the case with the max latency tracers
3891 	 * that a reset never took place on a cpu. This is evident
3892 	 * by the timestamp being before the start of the buffer.
3893 	 */
3894 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3895 		if (ts >= iter->array_buffer->time_start)
3896 			break;
3897 		entries++;
3898 		ring_buffer_iter_advance(buf_iter);
3899 		/* This could be a big loop */
3900 		cond_resched();
3901 	}
3902 
3903 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3904 }
3905 
3906 /*
3907  * The current tracer is copied to avoid a global locking
3908  * all around.
3909  */
3910 static void *s_start(struct seq_file *m, loff_t *pos)
3911 {
3912 	struct trace_iterator *iter = m->private;
3913 	struct trace_array *tr = iter->tr;
3914 	int cpu_file = iter->cpu_file;
3915 	void *p = NULL;
3916 	loff_t l = 0;
3917 	int cpu;
3918 
3919 	mutex_lock(&trace_types_lock);
3920 	if (unlikely(tr->current_trace != iter->trace)) {
3921 		/* Close iter->trace before switching to the new current tracer */
3922 		if (iter->trace->close)
3923 			iter->trace->close(iter);
3924 		iter->trace = tr->current_trace;
3925 		/* Reopen the new current tracer */
3926 		if (iter->trace->open)
3927 			iter->trace->open(iter);
3928 	}
3929 	mutex_unlock(&trace_types_lock);
3930 
3931 #ifdef CONFIG_TRACER_MAX_TRACE
3932 	if (iter->snapshot && iter->trace->use_max_tr)
3933 		return ERR_PTR(-EBUSY);
3934 #endif
3935 
3936 	if (*pos != iter->pos) {
3937 		iter->ent = NULL;
3938 		iter->cpu = 0;
3939 		iter->idx = -1;
3940 
3941 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3942 			for_each_tracing_cpu(cpu)
3943 				tracing_iter_reset(iter, cpu);
3944 		} else
3945 			tracing_iter_reset(iter, cpu_file);
3946 
3947 		iter->leftover = 0;
3948 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3949 			;
3950 
3951 	} else {
3952 		/*
3953 		 * If we overflowed the seq_file before, then we want
3954 		 * to just reuse the trace_seq buffer again.
3955 		 */
3956 		if (iter->leftover)
3957 			p = iter;
3958 		else {
3959 			l = *pos - 1;
3960 			p = s_next(m, p, &l);
3961 		}
3962 	}
3963 
3964 	trace_event_read_lock();
3965 	trace_access_lock(cpu_file);
3966 	return p;
3967 }
3968 
3969 static void s_stop(struct seq_file *m, void *p)
3970 {
3971 	struct trace_iterator *iter = m->private;
3972 
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974 	if (iter->snapshot && iter->trace->use_max_tr)
3975 		return;
3976 #endif
3977 
3978 	trace_access_unlock(iter->cpu_file);
3979 	trace_event_read_unlock();
3980 }
3981 
3982 static void
3983 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3984 		      unsigned long *entries, int cpu)
3985 {
3986 	unsigned long count;
3987 
3988 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3989 	/*
3990 	 * If this buffer has skipped entries, then we hold all
3991 	 * entries for the trace and we need to ignore the
3992 	 * ones before the time stamp.
3993 	 */
3994 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3995 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3996 		/* total is the same as the entries */
3997 		*total = count;
3998 	} else
3999 		*total = count +
4000 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4001 	*entries = count;
4002 }
4003 
4004 static void
4005 get_total_entries(struct array_buffer *buf,
4006 		  unsigned long *total, unsigned long *entries)
4007 {
4008 	unsigned long t, e;
4009 	int cpu;
4010 
4011 	*total = 0;
4012 	*entries = 0;
4013 
4014 	for_each_tracing_cpu(cpu) {
4015 		get_total_entries_cpu(buf, &t, &e, cpu);
4016 		*total += t;
4017 		*entries += e;
4018 	}
4019 }
4020 
4021 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4022 {
4023 	unsigned long total, entries;
4024 
4025 	if (!tr)
4026 		tr = &global_trace;
4027 
4028 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4029 
4030 	return entries;
4031 }
4032 
4033 unsigned long trace_total_entries(struct trace_array *tr)
4034 {
4035 	unsigned long total, entries;
4036 
4037 	if (!tr)
4038 		tr = &global_trace;
4039 
4040 	get_total_entries(&tr->array_buffer, &total, &entries);
4041 
4042 	return entries;
4043 }
4044 
4045 static void print_lat_help_header(struct seq_file *m)
4046 {
4047 	seq_puts(m, "#                    _------=> CPU#            \n"
4048 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4049 		    "#                  | / _----=> need-resched    \n"
4050 		    "#                  || / _---=> hardirq/softirq \n"
4051 		    "#                  ||| / _--=> preempt-depth   \n"
4052 		    "#                  |||| / _-=> migrate-disable \n"
4053 		    "#                  ||||| /     delay           \n"
4054 		    "#  cmd     pid     |||||| time  |   caller     \n"
4055 		    "#     \\   /        ||||||  \\    |    /       \n");
4056 }
4057 
4058 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4059 {
4060 	unsigned long total;
4061 	unsigned long entries;
4062 
4063 	get_total_entries(buf, &total, &entries);
4064 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4065 		   entries, total, num_online_cpus());
4066 	seq_puts(m, "#\n");
4067 }
4068 
4069 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4070 				   unsigned int flags)
4071 {
4072 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4073 
4074 	print_event_info(buf, m);
4075 
4076 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4077 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4078 }
4079 
4080 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4081 				       unsigned int flags)
4082 {
4083 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4084 	static const char space[] = "            ";
4085 	int prec = tgid ? 12 : 2;
4086 
4087 	print_event_info(buf, m);
4088 
4089 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4090 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4091 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4092 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4093 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4094 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4095 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4096 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4097 }
4098 
4099 void
4100 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4101 {
4102 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4103 	struct array_buffer *buf = iter->array_buffer;
4104 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4105 	struct tracer *type = iter->trace;
4106 	unsigned long entries;
4107 	unsigned long total;
4108 	const char *name = type->name;
4109 
4110 	get_total_entries(buf, &total, &entries);
4111 
4112 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4113 		   name, init_utsname()->release);
4114 	seq_puts(m, "# -----------------------------------"
4115 		 "---------------------------------\n");
4116 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4117 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4118 		   nsecs_to_usecs(data->saved_latency),
4119 		   entries,
4120 		   total,
4121 		   buf->cpu,
4122 		   preempt_model_str(),
4123 		   /* These are reserved for later use */
4124 		   0, 0, 0, 0);
4125 #ifdef CONFIG_SMP
4126 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4127 #else
4128 	seq_puts(m, ")\n");
4129 #endif
4130 	seq_puts(m, "#    -----------------\n");
4131 	seq_printf(m, "#    | task: %.16s-%d "
4132 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4133 		   data->comm, data->pid,
4134 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4135 		   data->policy, data->rt_priority);
4136 	seq_puts(m, "#    -----------------\n");
4137 
4138 	if (data->critical_start) {
4139 		seq_puts(m, "#  => started at: ");
4140 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4141 		trace_print_seq(m, &iter->seq);
4142 		seq_puts(m, "\n#  => ended at:   ");
4143 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4144 		trace_print_seq(m, &iter->seq);
4145 		seq_puts(m, "\n#\n");
4146 	}
4147 
4148 	seq_puts(m, "#\n");
4149 }
4150 
4151 static void test_cpu_buff_start(struct trace_iterator *iter)
4152 {
4153 	struct trace_seq *s = &iter->seq;
4154 	struct trace_array *tr = iter->tr;
4155 
4156 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4157 		return;
4158 
4159 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4160 		return;
4161 
4162 	if (cpumask_available(iter->started) &&
4163 	    cpumask_test_cpu(iter->cpu, iter->started))
4164 		return;
4165 
4166 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4167 		return;
4168 
4169 	if (cpumask_available(iter->started))
4170 		cpumask_set_cpu(iter->cpu, iter->started);
4171 
4172 	/* Don't print started cpu buffer for the first entry of the trace */
4173 	if (iter->idx > 1)
4174 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4175 				iter->cpu);
4176 }
4177 
4178 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4179 {
4180 	struct trace_array *tr = iter->tr;
4181 	struct trace_seq *s = &iter->seq;
4182 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4183 	struct trace_entry *entry;
4184 	struct trace_event *event;
4185 
4186 	entry = iter->ent;
4187 
4188 	test_cpu_buff_start(iter);
4189 
4190 	event = ftrace_find_event(entry->type);
4191 
4192 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4193 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4194 			trace_print_lat_context(iter);
4195 		else
4196 			trace_print_context(iter);
4197 	}
4198 
4199 	if (trace_seq_has_overflowed(s))
4200 		return TRACE_TYPE_PARTIAL_LINE;
4201 
4202 	if (event) {
4203 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4204 			return print_event_fields(iter, event);
4205 		/*
4206 		 * For TRACE_EVENT() events, the print_fmt is not
4207 		 * safe to use if the array has delta offsets
4208 		 * Force printing via the fields.
4209 		 */
4210 		if ((tr->text_delta) &&
4211 		    event->type > __TRACE_LAST_TYPE)
4212 			return print_event_fields(iter, event);
4213 
4214 		return event->funcs->trace(iter, sym_flags, event);
4215 	}
4216 
4217 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4218 
4219 	return trace_handle_return(s);
4220 }
4221 
4222 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4223 {
4224 	struct trace_array *tr = iter->tr;
4225 	struct trace_seq *s = &iter->seq;
4226 	struct trace_entry *entry;
4227 	struct trace_event *event;
4228 
4229 	entry = iter->ent;
4230 
4231 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4232 		trace_seq_printf(s, "%d %d %llu ",
4233 				 entry->pid, iter->cpu, iter->ts);
4234 
4235 	if (trace_seq_has_overflowed(s))
4236 		return TRACE_TYPE_PARTIAL_LINE;
4237 
4238 	event = ftrace_find_event(entry->type);
4239 	if (event)
4240 		return event->funcs->raw(iter, 0, event);
4241 
4242 	trace_seq_printf(s, "%d ?\n", entry->type);
4243 
4244 	return trace_handle_return(s);
4245 }
4246 
4247 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4248 {
4249 	struct trace_array *tr = iter->tr;
4250 	struct trace_seq *s = &iter->seq;
4251 	unsigned char newline = '\n';
4252 	struct trace_entry *entry;
4253 	struct trace_event *event;
4254 
4255 	entry = iter->ent;
4256 
4257 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4258 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4259 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4260 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4261 		if (trace_seq_has_overflowed(s))
4262 			return TRACE_TYPE_PARTIAL_LINE;
4263 	}
4264 
4265 	event = ftrace_find_event(entry->type);
4266 	if (event) {
4267 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4268 		if (ret != TRACE_TYPE_HANDLED)
4269 			return ret;
4270 	}
4271 
4272 	SEQ_PUT_FIELD(s, newline);
4273 
4274 	return trace_handle_return(s);
4275 }
4276 
4277 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4278 {
4279 	struct trace_array *tr = iter->tr;
4280 	struct trace_seq *s = &iter->seq;
4281 	struct trace_entry *entry;
4282 	struct trace_event *event;
4283 
4284 	entry = iter->ent;
4285 
4286 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4287 		SEQ_PUT_FIELD(s, entry->pid);
4288 		SEQ_PUT_FIELD(s, iter->cpu);
4289 		SEQ_PUT_FIELD(s, iter->ts);
4290 		if (trace_seq_has_overflowed(s))
4291 			return TRACE_TYPE_PARTIAL_LINE;
4292 	}
4293 
4294 	event = ftrace_find_event(entry->type);
4295 	return event ? event->funcs->binary(iter, 0, event) :
4296 		TRACE_TYPE_HANDLED;
4297 }
4298 
4299 int trace_empty(struct trace_iterator *iter)
4300 {
4301 	struct ring_buffer_iter *buf_iter;
4302 	int cpu;
4303 
4304 	/* If we are looking at one CPU buffer, only check that one */
4305 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4306 		cpu = iter->cpu_file;
4307 		buf_iter = trace_buffer_iter(iter, cpu);
4308 		if (buf_iter) {
4309 			if (!ring_buffer_iter_empty(buf_iter))
4310 				return 0;
4311 		} else {
4312 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4313 				return 0;
4314 		}
4315 		return 1;
4316 	}
4317 
4318 	for_each_tracing_cpu(cpu) {
4319 		buf_iter = trace_buffer_iter(iter, cpu);
4320 		if (buf_iter) {
4321 			if (!ring_buffer_iter_empty(buf_iter))
4322 				return 0;
4323 		} else {
4324 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4325 				return 0;
4326 		}
4327 	}
4328 
4329 	return 1;
4330 }
4331 
4332 /*  Called with trace_event_read_lock() held. */
4333 enum print_line_t print_trace_line(struct trace_iterator *iter)
4334 {
4335 	struct trace_array *tr = iter->tr;
4336 	unsigned long trace_flags = tr->trace_flags;
4337 	enum print_line_t ret;
4338 
4339 	if (iter->lost_events) {
4340 		if (iter->lost_events == (unsigned long)-1)
4341 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4342 					 iter->cpu);
4343 		else
4344 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4345 					 iter->cpu, iter->lost_events);
4346 		if (trace_seq_has_overflowed(&iter->seq))
4347 			return TRACE_TYPE_PARTIAL_LINE;
4348 	}
4349 
4350 	if (iter->trace && iter->trace->print_line) {
4351 		ret = iter->trace->print_line(iter);
4352 		if (ret != TRACE_TYPE_UNHANDLED)
4353 			return ret;
4354 	}
4355 
4356 	if (iter->ent->type == TRACE_BPUTS &&
4357 			trace_flags & TRACE_ITER_PRINTK &&
4358 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4359 		return trace_print_bputs_msg_only(iter);
4360 
4361 	if (iter->ent->type == TRACE_BPRINT &&
4362 			trace_flags & TRACE_ITER_PRINTK &&
4363 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4364 		return trace_print_bprintk_msg_only(iter);
4365 
4366 	if (iter->ent->type == TRACE_PRINT &&
4367 			trace_flags & TRACE_ITER_PRINTK &&
4368 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4369 		return trace_print_printk_msg_only(iter);
4370 
4371 	if (trace_flags & TRACE_ITER_BIN)
4372 		return print_bin_fmt(iter);
4373 
4374 	if (trace_flags & TRACE_ITER_HEX)
4375 		return print_hex_fmt(iter);
4376 
4377 	if (trace_flags & TRACE_ITER_RAW)
4378 		return print_raw_fmt(iter);
4379 
4380 	return print_trace_fmt(iter);
4381 }
4382 
4383 void trace_latency_header(struct seq_file *m)
4384 {
4385 	struct trace_iterator *iter = m->private;
4386 	struct trace_array *tr = iter->tr;
4387 
4388 	/* print nothing if the buffers are empty */
4389 	if (trace_empty(iter))
4390 		return;
4391 
4392 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4393 		print_trace_header(m, iter);
4394 
4395 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4396 		print_lat_help_header(m);
4397 }
4398 
4399 void trace_default_header(struct seq_file *m)
4400 {
4401 	struct trace_iterator *iter = m->private;
4402 	struct trace_array *tr = iter->tr;
4403 	unsigned long trace_flags = tr->trace_flags;
4404 
4405 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4406 		return;
4407 
4408 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4409 		/* print nothing if the buffers are empty */
4410 		if (trace_empty(iter))
4411 			return;
4412 		print_trace_header(m, iter);
4413 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4414 			print_lat_help_header(m);
4415 	} else {
4416 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4417 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4418 				print_func_help_header_irq(iter->array_buffer,
4419 							   m, trace_flags);
4420 			else
4421 				print_func_help_header(iter->array_buffer, m,
4422 						       trace_flags);
4423 		}
4424 	}
4425 }
4426 
4427 static void test_ftrace_alive(struct seq_file *m)
4428 {
4429 	if (!ftrace_is_dead())
4430 		return;
4431 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4432 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4433 }
4434 
4435 #ifdef CONFIG_TRACER_MAX_TRACE
4436 static void show_snapshot_main_help(struct seq_file *m)
4437 {
4438 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4439 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4440 		    "#                      Takes a snapshot of the main buffer.\n"
4441 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4442 		    "#                      (Doesn't have to be '2' works with any number that\n"
4443 		    "#                       is not a '0' or '1')\n");
4444 }
4445 
4446 static void show_snapshot_percpu_help(struct seq_file *m)
4447 {
4448 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4449 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4450 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4451 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4452 #else
4453 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4454 		    "#                     Must use main snapshot file to allocate.\n");
4455 #endif
4456 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4457 		    "#                      (Doesn't have to be '2' works with any number that\n"
4458 		    "#                       is not a '0' or '1')\n");
4459 }
4460 
4461 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4462 {
4463 	if (iter->tr->allocated_snapshot)
4464 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4465 	else
4466 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4467 
4468 	seq_puts(m, "# Snapshot commands:\n");
4469 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4470 		show_snapshot_main_help(m);
4471 	else
4472 		show_snapshot_percpu_help(m);
4473 }
4474 #else
4475 /* Should never be called */
4476 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4477 #endif
4478 
4479 static int s_show(struct seq_file *m, void *v)
4480 {
4481 	struct trace_iterator *iter = v;
4482 	int ret;
4483 
4484 	if (iter->ent == NULL) {
4485 		if (iter->tr) {
4486 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4487 			seq_puts(m, "#\n");
4488 			test_ftrace_alive(m);
4489 		}
4490 		if (iter->snapshot && trace_empty(iter))
4491 			print_snapshot_help(m, iter);
4492 		else if (iter->trace && iter->trace->print_header)
4493 			iter->trace->print_header(m);
4494 		else
4495 			trace_default_header(m);
4496 
4497 	} else if (iter->leftover) {
4498 		/*
4499 		 * If we filled the seq_file buffer earlier, we
4500 		 * want to just show it now.
4501 		 */
4502 		ret = trace_print_seq(m, &iter->seq);
4503 
4504 		/* ret should this time be zero, but you never know */
4505 		iter->leftover = ret;
4506 
4507 	} else {
4508 		ret = print_trace_line(iter);
4509 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4510 			iter->seq.full = 0;
4511 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4512 		}
4513 		ret = trace_print_seq(m, &iter->seq);
4514 		/*
4515 		 * If we overflow the seq_file buffer, then it will
4516 		 * ask us for this data again at start up.
4517 		 * Use that instead.
4518 		 *  ret is 0 if seq_file write succeeded.
4519 		 *        -1 otherwise.
4520 		 */
4521 		iter->leftover = ret;
4522 	}
4523 
4524 	return 0;
4525 }
4526 
4527 /*
4528  * Should be used after trace_array_get(), trace_types_lock
4529  * ensures that i_cdev was already initialized.
4530  */
4531 static inline int tracing_get_cpu(struct inode *inode)
4532 {
4533 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4534 		return (long)inode->i_cdev - 1;
4535 	return RING_BUFFER_ALL_CPUS;
4536 }
4537 
4538 static const struct seq_operations tracer_seq_ops = {
4539 	.start		= s_start,
4540 	.next		= s_next,
4541 	.stop		= s_stop,
4542 	.show		= s_show,
4543 };
4544 
4545 /*
4546  * Note, as iter itself can be allocated and freed in different
4547  * ways, this function is only used to free its content, and not
4548  * the iterator itself. The only requirement to all the allocations
4549  * is that it must zero all fields (kzalloc), as freeing works with
4550  * ethier allocated content or NULL.
4551  */
4552 static void free_trace_iter_content(struct trace_iterator *iter)
4553 {
4554 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4555 	if (iter->fmt != static_fmt_buf)
4556 		kfree(iter->fmt);
4557 
4558 	kfree(iter->temp);
4559 	kfree(iter->buffer_iter);
4560 	mutex_destroy(&iter->mutex);
4561 	free_cpumask_var(iter->started);
4562 }
4563 
4564 static struct trace_iterator *
4565 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4566 {
4567 	struct trace_array *tr = inode->i_private;
4568 	struct trace_iterator *iter;
4569 	int cpu;
4570 
4571 	if (tracing_disabled)
4572 		return ERR_PTR(-ENODEV);
4573 
4574 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4575 	if (!iter)
4576 		return ERR_PTR(-ENOMEM);
4577 
4578 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4579 				    GFP_KERNEL);
4580 	if (!iter->buffer_iter)
4581 		goto release;
4582 
4583 	/*
4584 	 * trace_find_next_entry() may need to save off iter->ent.
4585 	 * It will place it into the iter->temp buffer. As most
4586 	 * events are less than 128, allocate a buffer of that size.
4587 	 * If one is greater, then trace_find_next_entry() will
4588 	 * allocate a new buffer to adjust for the bigger iter->ent.
4589 	 * It's not critical if it fails to get allocated here.
4590 	 */
4591 	iter->temp = kmalloc(128, GFP_KERNEL);
4592 	if (iter->temp)
4593 		iter->temp_size = 128;
4594 
4595 	/*
4596 	 * trace_event_printf() may need to modify given format
4597 	 * string to replace %p with %px so that it shows real address
4598 	 * instead of hash value. However, that is only for the event
4599 	 * tracing, other tracer may not need. Defer the allocation
4600 	 * until it is needed.
4601 	 */
4602 	iter->fmt = NULL;
4603 	iter->fmt_size = 0;
4604 
4605 	mutex_lock(&trace_types_lock);
4606 	iter->trace = tr->current_trace;
4607 
4608 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4609 		goto fail;
4610 
4611 	iter->tr = tr;
4612 
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 	/* Currently only the top directory has a snapshot */
4615 	if (tr->current_trace->print_max || snapshot)
4616 		iter->array_buffer = &tr->max_buffer;
4617 	else
4618 #endif
4619 		iter->array_buffer = &tr->array_buffer;
4620 	iter->snapshot = snapshot;
4621 	iter->pos = -1;
4622 	iter->cpu_file = tracing_get_cpu(inode);
4623 	mutex_init(&iter->mutex);
4624 
4625 	/* Notify the tracer early; before we stop tracing. */
4626 	if (iter->trace->open)
4627 		iter->trace->open(iter);
4628 
4629 	/* Annotate start of buffers if we had overruns */
4630 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4631 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4632 
4633 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4634 	if (trace_clocks[tr->clock_id].in_ns)
4635 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4636 
4637 	/*
4638 	 * If pause-on-trace is enabled, then stop the trace while
4639 	 * dumping, unless this is the "snapshot" file
4640 	 */
4641 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4642 		tracing_stop_tr(tr);
4643 
4644 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4645 		for_each_tracing_cpu(cpu) {
4646 			iter->buffer_iter[cpu] =
4647 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4648 							 cpu, GFP_KERNEL);
4649 		}
4650 		ring_buffer_read_prepare_sync();
4651 		for_each_tracing_cpu(cpu) {
4652 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4653 			tracing_iter_reset(iter, cpu);
4654 		}
4655 	} else {
4656 		cpu = iter->cpu_file;
4657 		iter->buffer_iter[cpu] =
4658 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4659 						 cpu, GFP_KERNEL);
4660 		ring_buffer_read_prepare_sync();
4661 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4662 		tracing_iter_reset(iter, cpu);
4663 	}
4664 
4665 	mutex_unlock(&trace_types_lock);
4666 
4667 	return iter;
4668 
4669  fail:
4670 	mutex_unlock(&trace_types_lock);
4671 	free_trace_iter_content(iter);
4672 release:
4673 	seq_release_private(inode, file);
4674 	return ERR_PTR(-ENOMEM);
4675 }
4676 
4677 int tracing_open_generic(struct inode *inode, struct file *filp)
4678 {
4679 	int ret;
4680 
4681 	ret = tracing_check_open_get_tr(NULL);
4682 	if (ret)
4683 		return ret;
4684 
4685 	filp->private_data = inode->i_private;
4686 	return 0;
4687 }
4688 
4689 bool tracing_is_disabled(void)
4690 {
4691 	return (tracing_disabled) ? true: false;
4692 }
4693 
4694 /*
4695  * Open and update trace_array ref count.
4696  * Must have the current trace_array passed to it.
4697  */
4698 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4699 {
4700 	struct trace_array *tr = inode->i_private;
4701 	int ret;
4702 
4703 	ret = tracing_check_open_get_tr(tr);
4704 	if (ret)
4705 		return ret;
4706 
4707 	filp->private_data = inode->i_private;
4708 
4709 	return 0;
4710 }
4711 
4712 /*
4713  * The private pointer of the inode is the trace_event_file.
4714  * Update the tr ref count associated to it.
4715  */
4716 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4717 {
4718 	struct trace_event_file *file = inode->i_private;
4719 	int ret;
4720 
4721 	ret = tracing_check_open_get_tr(file->tr);
4722 	if (ret)
4723 		return ret;
4724 
4725 	mutex_lock(&event_mutex);
4726 
4727 	/* Fail if the file is marked for removal */
4728 	if (file->flags & EVENT_FILE_FL_FREED) {
4729 		trace_array_put(file->tr);
4730 		ret = -ENODEV;
4731 	} else {
4732 		event_file_get(file);
4733 	}
4734 
4735 	mutex_unlock(&event_mutex);
4736 	if (ret)
4737 		return ret;
4738 
4739 	filp->private_data = inode->i_private;
4740 
4741 	return 0;
4742 }
4743 
4744 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4745 {
4746 	struct trace_event_file *file = inode->i_private;
4747 
4748 	trace_array_put(file->tr);
4749 	event_file_put(file);
4750 
4751 	return 0;
4752 }
4753 
4754 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 	tracing_release_file_tr(inode, filp);
4757 	return single_release(inode, filp);
4758 }
4759 
4760 static int tracing_mark_open(struct inode *inode, struct file *filp)
4761 {
4762 	stream_open(inode, filp);
4763 	return tracing_open_generic_tr(inode, filp);
4764 }
4765 
4766 static int tracing_release(struct inode *inode, struct file *file)
4767 {
4768 	struct trace_array *tr = inode->i_private;
4769 	struct seq_file *m = file->private_data;
4770 	struct trace_iterator *iter;
4771 	int cpu;
4772 
4773 	if (!(file->f_mode & FMODE_READ)) {
4774 		trace_array_put(tr);
4775 		return 0;
4776 	}
4777 
4778 	/* Writes do not use seq_file */
4779 	iter = m->private;
4780 	mutex_lock(&trace_types_lock);
4781 
4782 	for_each_tracing_cpu(cpu) {
4783 		if (iter->buffer_iter[cpu])
4784 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4785 	}
4786 
4787 	if (iter->trace && iter->trace->close)
4788 		iter->trace->close(iter);
4789 
4790 	if (!iter->snapshot && tr->stop_count)
4791 		/* reenable tracing if it was previously enabled */
4792 		tracing_start_tr(tr);
4793 
4794 	__trace_array_put(tr);
4795 
4796 	mutex_unlock(&trace_types_lock);
4797 
4798 	free_trace_iter_content(iter);
4799 	seq_release_private(inode, file);
4800 
4801 	return 0;
4802 }
4803 
4804 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4805 {
4806 	struct trace_array *tr = inode->i_private;
4807 
4808 	trace_array_put(tr);
4809 	return 0;
4810 }
4811 
4812 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4813 {
4814 	struct trace_array *tr = inode->i_private;
4815 
4816 	trace_array_put(tr);
4817 
4818 	return single_release(inode, file);
4819 }
4820 
4821 static int tracing_open(struct inode *inode, struct file *file)
4822 {
4823 	struct trace_array *tr = inode->i_private;
4824 	struct trace_iterator *iter;
4825 	int ret;
4826 
4827 	ret = tracing_check_open_get_tr(tr);
4828 	if (ret)
4829 		return ret;
4830 
4831 	/* If this file was open for write, then erase contents */
4832 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4833 		int cpu = tracing_get_cpu(inode);
4834 		struct array_buffer *trace_buf = &tr->array_buffer;
4835 
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837 		if (tr->current_trace->print_max)
4838 			trace_buf = &tr->max_buffer;
4839 #endif
4840 
4841 		if (cpu == RING_BUFFER_ALL_CPUS)
4842 			tracing_reset_online_cpus(trace_buf);
4843 		else
4844 			tracing_reset_cpu(trace_buf, cpu);
4845 	}
4846 
4847 	if (file->f_mode & FMODE_READ) {
4848 		iter = __tracing_open(inode, file, false);
4849 		if (IS_ERR(iter))
4850 			ret = PTR_ERR(iter);
4851 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4852 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4853 	}
4854 
4855 	if (ret < 0)
4856 		trace_array_put(tr);
4857 
4858 	return ret;
4859 }
4860 
4861 /*
4862  * Some tracers are not suitable for instance buffers.
4863  * A tracer is always available for the global array (toplevel)
4864  * or if it explicitly states that it is.
4865  */
4866 static bool
4867 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4868 {
4869 #ifdef CONFIG_TRACER_SNAPSHOT
4870 	/* arrays with mapped buffer range do not have snapshots */
4871 	if (tr->range_addr_start && t->use_max_tr)
4872 		return false;
4873 #endif
4874 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4875 }
4876 
4877 /* Find the next tracer that this trace array may use */
4878 static struct tracer *
4879 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4880 {
4881 	while (t && !trace_ok_for_array(t, tr))
4882 		t = t->next;
4883 
4884 	return t;
4885 }
4886 
4887 static void *
4888 t_next(struct seq_file *m, void *v, loff_t *pos)
4889 {
4890 	struct trace_array *tr = m->private;
4891 	struct tracer *t = v;
4892 
4893 	(*pos)++;
4894 
4895 	if (t)
4896 		t = get_tracer_for_array(tr, t->next);
4897 
4898 	return t;
4899 }
4900 
4901 static void *t_start(struct seq_file *m, loff_t *pos)
4902 {
4903 	struct trace_array *tr = m->private;
4904 	struct tracer *t;
4905 	loff_t l = 0;
4906 
4907 	mutex_lock(&trace_types_lock);
4908 
4909 	t = get_tracer_for_array(tr, trace_types);
4910 	for (; t && l < *pos; t = t_next(m, t, &l))
4911 			;
4912 
4913 	return t;
4914 }
4915 
4916 static void t_stop(struct seq_file *m, void *p)
4917 {
4918 	mutex_unlock(&trace_types_lock);
4919 }
4920 
4921 static int t_show(struct seq_file *m, void *v)
4922 {
4923 	struct tracer *t = v;
4924 
4925 	if (!t)
4926 		return 0;
4927 
4928 	seq_puts(m, t->name);
4929 	if (t->next)
4930 		seq_putc(m, ' ');
4931 	else
4932 		seq_putc(m, '\n');
4933 
4934 	return 0;
4935 }
4936 
4937 static const struct seq_operations show_traces_seq_ops = {
4938 	.start		= t_start,
4939 	.next		= t_next,
4940 	.stop		= t_stop,
4941 	.show		= t_show,
4942 };
4943 
4944 static int show_traces_open(struct inode *inode, struct file *file)
4945 {
4946 	struct trace_array *tr = inode->i_private;
4947 	struct seq_file *m;
4948 	int ret;
4949 
4950 	ret = tracing_check_open_get_tr(tr);
4951 	if (ret)
4952 		return ret;
4953 
4954 	ret = seq_open(file, &show_traces_seq_ops);
4955 	if (ret) {
4956 		trace_array_put(tr);
4957 		return ret;
4958 	}
4959 
4960 	m = file->private_data;
4961 	m->private = tr;
4962 
4963 	return 0;
4964 }
4965 
4966 static int tracing_seq_release(struct inode *inode, struct file *file)
4967 {
4968 	struct trace_array *tr = inode->i_private;
4969 
4970 	trace_array_put(tr);
4971 	return seq_release(inode, file);
4972 }
4973 
4974 static ssize_t
4975 tracing_write_stub(struct file *filp, const char __user *ubuf,
4976 		   size_t count, loff_t *ppos)
4977 {
4978 	return count;
4979 }
4980 
4981 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4982 {
4983 	int ret;
4984 
4985 	if (file->f_mode & FMODE_READ)
4986 		ret = seq_lseek(file, offset, whence);
4987 	else
4988 		file->f_pos = ret = 0;
4989 
4990 	return ret;
4991 }
4992 
4993 static const struct file_operations tracing_fops = {
4994 	.open		= tracing_open,
4995 	.read		= seq_read,
4996 	.read_iter	= seq_read_iter,
4997 	.splice_read	= copy_splice_read,
4998 	.write		= tracing_write_stub,
4999 	.llseek		= tracing_lseek,
5000 	.release	= tracing_release,
5001 };
5002 
5003 static const struct file_operations show_traces_fops = {
5004 	.open		= show_traces_open,
5005 	.read		= seq_read,
5006 	.llseek		= seq_lseek,
5007 	.release	= tracing_seq_release,
5008 };
5009 
5010 static ssize_t
5011 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5012 		     size_t count, loff_t *ppos)
5013 {
5014 	struct trace_array *tr = file_inode(filp)->i_private;
5015 	char *mask_str;
5016 	int len;
5017 
5018 	len = snprintf(NULL, 0, "%*pb\n",
5019 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5020 	mask_str = kmalloc(len, GFP_KERNEL);
5021 	if (!mask_str)
5022 		return -ENOMEM;
5023 
5024 	len = snprintf(mask_str, len, "%*pb\n",
5025 		       cpumask_pr_args(tr->tracing_cpumask));
5026 	if (len >= count) {
5027 		count = -EINVAL;
5028 		goto out_err;
5029 	}
5030 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5031 
5032 out_err:
5033 	kfree(mask_str);
5034 
5035 	return count;
5036 }
5037 
5038 int tracing_set_cpumask(struct trace_array *tr,
5039 			cpumask_var_t tracing_cpumask_new)
5040 {
5041 	int cpu;
5042 
5043 	if (!tr)
5044 		return -EINVAL;
5045 
5046 	local_irq_disable();
5047 	arch_spin_lock(&tr->max_lock);
5048 	for_each_tracing_cpu(cpu) {
5049 		/*
5050 		 * Increase/decrease the disabled counter if we are
5051 		 * about to flip a bit in the cpumask:
5052 		 */
5053 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5054 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5055 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5056 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5059 #endif
5060 		}
5061 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5062 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5063 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5064 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5065 #ifdef CONFIG_TRACER_MAX_TRACE
5066 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5067 #endif
5068 		}
5069 	}
5070 	arch_spin_unlock(&tr->max_lock);
5071 	local_irq_enable();
5072 
5073 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5074 
5075 	return 0;
5076 }
5077 
5078 static ssize_t
5079 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5080 		      size_t count, loff_t *ppos)
5081 {
5082 	struct trace_array *tr = file_inode(filp)->i_private;
5083 	cpumask_var_t tracing_cpumask_new;
5084 	int err;
5085 
5086 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5087 		return -EINVAL;
5088 
5089 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5090 		return -ENOMEM;
5091 
5092 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5093 	if (err)
5094 		goto err_free;
5095 
5096 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5097 	if (err)
5098 		goto err_free;
5099 
5100 	free_cpumask_var(tracing_cpumask_new);
5101 
5102 	return count;
5103 
5104 err_free:
5105 	free_cpumask_var(tracing_cpumask_new);
5106 
5107 	return err;
5108 }
5109 
5110 static const struct file_operations tracing_cpumask_fops = {
5111 	.open		= tracing_open_generic_tr,
5112 	.read		= tracing_cpumask_read,
5113 	.write		= tracing_cpumask_write,
5114 	.release	= tracing_release_generic_tr,
5115 	.llseek		= generic_file_llseek,
5116 };
5117 
5118 static int tracing_trace_options_show(struct seq_file *m, void *v)
5119 {
5120 	struct tracer_opt *trace_opts;
5121 	struct trace_array *tr = m->private;
5122 	u32 tracer_flags;
5123 	int i;
5124 
5125 	guard(mutex)(&trace_types_lock);
5126 
5127 	tracer_flags = tr->current_trace->flags->val;
5128 	trace_opts = tr->current_trace->flags->opts;
5129 
5130 	for (i = 0; trace_options[i]; i++) {
5131 		if (tr->trace_flags & (1 << i))
5132 			seq_printf(m, "%s\n", trace_options[i]);
5133 		else
5134 			seq_printf(m, "no%s\n", trace_options[i]);
5135 	}
5136 
5137 	for (i = 0; trace_opts[i].name; i++) {
5138 		if (tracer_flags & trace_opts[i].bit)
5139 			seq_printf(m, "%s\n", trace_opts[i].name);
5140 		else
5141 			seq_printf(m, "no%s\n", trace_opts[i].name);
5142 	}
5143 
5144 	return 0;
5145 }
5146 
5147 static int __set_tracer_option(struct trace_array *tr,
5148 			       struct tracer_flags *tracer_flags,
5149 			       struct tracer_opt *opts, int neg)
5150 {
5151 	struct tracer *trace = tracer_flags->trace;
5152 	int ret;
5153 
5154 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5155 	if (ret)
5156 		return ret;
5157 
5158 	if (neg)
5159 		tracer_flags->val &= ~opts->bit;
5160 	else
5161 		tracer_flags->val |= opts->bit;
5162 	return 0;
5163 }
5164 
5165 /* Try to assign a tracer specific option */
5166 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5167 {
5168 	struct tracer *trace = tr->current_trace;
5169 	struct tracer_flags *tracer_flags = trace->flags;
5170 	struct tracer_opt *opts = NULL;
5171 	int i;
5172 
5173 	for (i = 0; tracer_flags->opts[i].name; i++) {
5174 		opts = &tracer_flags->opts[i];
5175 
5176 		if (strcmp(cmp, opts->name) == 0)
5177 			return __set_tracer_option(tr, trace->flags, opts, neg);
5178 	}
5179 
5180 	return -EINVAL;
5181 }
5182 
5183 /* Some tracers require overwrite to stay enabled */
5184 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5185 {
5186 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5187 		return -1;
5188 
5189 	return 0;
5190 }
5191 
5192 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5193 {
5194 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5195 	    (mask == TRACE_ITER_RECORD_CMD) ||
5196 	    (mask == TRACE_ITER_TRACE_PRINTK))
5197 		lockdep_assert_held(&event_mutex);
5198 
5199 	/* do nothing if flag is already set */
5200 	if (!!(tr->trace_flags & mask) == !!enabled)
5201 		return 0;
5202 
5203 	/* Give the tracer a chance to approve the change */
5204 	if (tr->current_trace->flag_changed)
5205 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5206 			return -EINVAL;
5207 
5208 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5209 		if (enabled) {
5210 			update_printk_trace(tr);
5211 		} else {
5212 			/*
5213 			 * The global_trace cannot clear this.
5214 			 * It's flag only gets cleared if another instance sets it.
5215 			 */
5216 			if (printk_trace == &global_trace)
5217 				return -EINVAL;
5218 			/*
5219 			 * An instance must always have it set.
5220 			 * by default, that's the global_trace instane.
5221 			 */
5222 			if (printk_trace == tr)
5223 				update_printk_trace(&global_trace);
5224 		}
5225 	}
5226 
5227 	if (enabled)
5228 		tr->trace_flags |= mask;
5229 	else
5230 		tr->trace_flags &= ~mask;
5231 
5232 	if (mask == TRACE_ITER_RECORD_CMD)
5233 		trace_event_enable_cmd_record(enabled);
5234 
5235 	if (mask == TRACE_ITER_RECORD_TGID) {
5236 
5237 		if (trace_alloc_tgid_map() < 0) {
5238 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5239 			return -ENOMEM;
5240 		}
5241 
5242 		trace_event_enable_tgid_record(enabled);
5243 	}
5244 
5245 	if (mask == TRACE_ITER_EVENT_FORK)
5246 		trace_event_follow_fork(tr, enabled);
5247 
5248 	if (mask == TRACE_ITER_FUNC_FORK)
5249 		ftrace_pid_follow_fork(tr, enabled);
5250 
5251 	if (mask == TRACE_ITER_OVERWRITE) {
5252 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5253 #ifdef CONFIG_TRACER_MAX_TRACE
5254 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5255 #endif
5256 	}
5257 
5258 	if (mask == TRACE_ITER_PRINTK) {
5259 		trace_printk_start_stop_comm(enabled);
5260 		trace_printk_control(enabled);
5261 	}
5262 
5263 	return 0;
5264 }
5265 
5266 int trace_set_options(struct trace_array *tr, char *option)
5267 {
5268 	char *cmp;
5269 	int neg = 0;
5270 	int ret;
5271 	size_t orig_len = strlen(option);
5272 	int len;
5273 
5274 	cmp = strstrip(option);
5275 
5276 	len = str_has_prefix(cmp, "no");
5277 	if (len)
5278 		neg = 1;
5279 
5280 	cmp += len;
5281 
5282 	mutex_lock(&event_mutex);
5283 	mutex_lock(&trace_types_lock);
5284 
5285 	ret = match_string(trace_options, -1, cmp);
5286 	/* If no option could be set, test the specific tracer options */
5287 	if (ret < 0)
5288 		ret = set_tracer_option(tr, cmp, neg);
5289 	else
5290 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5291 
5292 	mutex_unlock(&trace_types_lock);
5293 	mutex_unlock(&event_mutex);
5294 
5295 	/*
5296 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5297 	 * turn it back into a space.
5298 	 */
5299 	if (orig_len > strlen(option))
5300 		option[strlen(option)] = ' ';
5301 
5302 	return ret;
5303 }
5304 
5305 static void __init apply_trace_boot_options(void)
5306 {
5307 	char *buf = trace_boot_options_buf;
5308 	char *option;
5309 
5310 	while (true) {
5311 		option = strsep(&buf, ",");
5312 
5313 		if (!option)
5314 			break;
5315 
5316 		if (*option)
5317 			trace_set_options(&global_trace, option);
5318 
5319 		/* Put back the comma to allow this to be called again */
5320 		if (buf)
5321 			*(buf - 1) = ',';
5322 	}
5323 }
5324 
5325 static ssize_t
5326 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5327 			size_t cnt, loff_t *ppos)
5328 {
5329 	struct seq_file *m = filp->private_data;
5330 	struct trace_array *tr = m->private;
5331 	char buf[64];
5332 	int ret;
5333 
5334 	if (cnt >= sizeof(buf))
5335 		return -EINVAL;
5336 
5337 	if (copy_from_user(buf, ubuf, cnt))
5338 		return -EFAULT;
5339 
5340 	buf[cnt] = 0;
5341 
5342 	ret = trace_set_options(tr, buf);
5343 	if (ret < 0)
5344 		return ret;
5345 
5346 	*ppos += cnt;
5347 
5348 	return cnt;
5349 }
5350 
5351 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5352 {
5353 	struct trace_array *tr = inode->i_private;
5354 	int ret;
5355 
5356 	ret = tracing_check_open_get_tr(tr);
5357 	if (ret)
5358 		return ret;
5359 
5360 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5361 	if (ret < 0)
5362 		trace_array_put(tr);
5363 
5364 	return ret;
5365 }
5366 
5367 static const struct file_operations tracing_iter_fops = {
5368 	.open		= tracing_trace_options_open,
5369 	.read		= seq_read,
5370 	.llseek		= seq_lseek,
5371 	.release	= tracing_single_release_tr,
5372 	.write		= tracing_trace_options_write,
5373 };
5374 
5375 static const char readme_msg[] =
5376 	"tracing mini-HOWTO:\n\n"
5377 	"By default tracefs removes all OTH file permission bits.\n"
5378 	"When mounting tracefs an optional group id can be specified\n"
5379 	"which adds the group to every directory and file in tracefs:\n\n"
5380 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5381 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5382 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5383 	" Important files:\n"
5384 	"  trace\t\t\t- The static contents of the buffer\n"
5385 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5386 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5387 	"  current_tracer\t- function and latency tracers\n"
5388 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5389 	"  error_log\t- error log for failed commands (that support it)\n"
5390 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5391 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5392 	"  trace_clock\t\t- change the clock used to order events\n"
5393 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5394 	"      global:   Synced across CPUs but slows tracing down.\n"
5395 	"     counter:   Not a clock, but just an increment\n"
5396 	"      uptime:   Jiffy counter from time of boot\n"
5397 	"        perf:   Same clock that perf events use\n"
5398 #ifdef CONFIG_X86_64
5399 	"     x86-tsc:   TSC cycle counter\n"
5400 #endif
5401 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5402 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5403 	"    absolute:   Absolute (standalone) timestamp\n"
5404 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5405 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5406 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5407 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5408 	"\t\t\t  Remove sub-buffer with rmdir\n"
5409 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5410 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5411 	"\t\t\t  option name\n"
5412 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5413 #ifdef CONFIG_DYNAMIC_FTRACE
5414 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5415 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5416 	"\t\t\t  functions\n"
5417 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5418 	"\t     modules: Can select a group via module\n"
5419 	"\t      Format: :mod:<module-name>\n"
5420 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5421 	"\t    triggers: a command to perform when function is hit\n"
5422 	"\t      Format: <function>:<trigger>[:count]\n"
5423 	"\t     trigger: traceon, traceoff\n"
5424 	"\t\t      enable_event:<system>:<event>\n"
5425 	"\t\t      disable_event:<system>:<event>\n"
5426 #ifdef CONFIG_STACKTRACE
5427 	"\t\t      stacktrace\n"
5428 #endif
5429 #ifdef CONFIG_TRACER_SNAPSHOT
5430 	"\t\t      snapshot\n"
5431 #endif
5432 	"\t\t      dump\n"
5433 	"\t\t      cpudump\n"
5434 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5435 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5436 	"\t     The first one will disable tracing every time do_fault is hit\n"
5437 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5438 	"\t       The first time do trap is hit and it disables tracing, the\n"
5439 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5440 	"\t       the counter will not decrement. It only decrements when the\n"
5441 	"\t       trigger did work\n"
5442 	"\t     To remove trigger without count:\n"
5443 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5444 	"\t     To remove trigger with a count:\n"
5445 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5446 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5447 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5448 	"\t    modules: Can select a group via module command :mod:\n"
5449 	"\t    Does not accept triggers\n"
5450 #endif /* CONFIG_DYNAMIC_FTRACE */
5451 #ifdef CONFIG_FUNCTION_TRACER
5452 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5453 	"\t\t    (function)\n"
5454 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5455 	"\t\t    (function)\n"
5456 #endif
5457 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5458 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5459 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5460 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5461 #endif
5462 #ifdef CONFIG_TRACER_SNAPSHOT
5463 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5464 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5465 	"\t\t\t  information\n"
5466 #endif
5467 #ifdef CONFIG_STACK_TRACER
5468 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5469 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5470 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5471 	"\t\t\t  new trace)\n"
5472 #ifdef CONFIG_DYNAMIC_FTRACE
5473 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5474 	"\t\t\t  traces\n"
5475 #endif
5476 #endif /* CONFIG_STACK_TRACER */
5477 #ifdef CONFIG_DYNAMIC_EVENTS
5478 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5479 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5480 #endif
5481 #ifdef CONFIG_KPROBE_EVENTS
5482 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5483 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5484 #endif
5485 #ifdef CONFIG_UPROBE_EVENTS
5486 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5487 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5488 #endif
5489 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5490     defined(CONFIG_FPROBE_EVENTS)
5491 	"\t  accepts: event-definitions (one definition per line)\n"
5492 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5493 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5494 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5495 #endif
5496 #ifdef CONFIG_FPROBE_EVENTS
5497 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5498 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5499 #endif
5500 #ifdef CONFIG_HIST_TRIGGERS
5501 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5502 #endif
5503 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5504 	"\t           -:[<group>/][<event>]\n"
5505 #ifdef CONFIG_KPROBE_EVENTS
5506 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5507   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5508 #endif
5509 #ifdef CONFIG_UPROBE_EVENTS
5510   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5511 #endif
5512 	"\t     args: <name>=fetcharg[:type]\n"
5513 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5514 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5515 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5516 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5517 	"\t           <argname>[->field[->field|.field...]],\n"
5518 #endif
5519 #else
5520 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5521 #endif
5522 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5523 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5524 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5525 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5526 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5527 #ifdef CONFIG_HIST_TRIGGERS
5528 	"\t    field: <stype> <name>;\n"
5529 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5530 	"\t           [unsigned] char/int/long\n"
5531 #endif
5532 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5533 	"\t            of the <attached-group>/<attached-event>.\n"
5534 #endif
5535 	"  set_event\t\t- Enables events by name written into it\n"
5536 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5537 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5538 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5539 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5540 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5541 	"\t\t\t  events\n"
5542 	"      filter\t\t- If set, only events passing filter are traced\n"
5543 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5544 	"\t\t\t  <event>:\n"
5545 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5546 	"      filter\t\t- If set, only events passing filter are traced\n"
5547 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5548 	"\t    Format: <trigger>[:count][if <filter>]\n"
5549 	"\t   trigger: traceon, traceoff\n"
5550 	"\t            enable_event:<system>:<event>\n"
5551 	"\t            disable_event:<system>:<event>\n"
5552 #ifdef CONFIG_HIST_TRIGGERS
5553 	"\t            enable_hist:<system>:<event>\n"
5554 	"\t            disable_hist:<system>:<event>\n"
5555 #endif
5556 #ifdef CONFIG_STACKTRACE
5557 	"\t\t    stacktrace\n"
5558 #endif
5559 #ifdef CONFIG_TRACER_SNAPSHOT
5560 	"\t\t    snapshot\n"
5561 #endif
5562 #ifdef CONFIG_HIST_TRIGGERS
5563 	"\t\t    hist (see below)\n"
5564 #endif
5565 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5566 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5567 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5568 	"\t                  events/block/block_unplug/trigger\n"
5569 	"\t   The first disables tracing every time block_unplug is hit.\n"
5570 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5571 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5572 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5573 	"\t   Like function triggers, the counter is only decremented if it\n"
5574 	"\t    enabled or disabled tracing.\n"
5575 	"\t   To remove a trigger without a count:\n"
5576 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5577 	"\t   To remove a trigger with a count:\n"
5578 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5579 	"\t   Filters can be ignored when removing a trigger.\n"
5580 #ifdef CONFIG_HIST_TRIGGERS
5581 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5582 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5583 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5584 	"\t            [:values=<field1[,field2,...]>]\n"
5585 	"\t            [:sort=<field1[,field2,...]>]\n"
5586 	"\t            [:size=#entries]\n"
5587 	"\t            [:pause][:continue][:clear]\n"
5588 	"\t            [:name=histname1]\n"
5589 	"\t            [:nohitcount]\n"
5590 	"\t            [:<handler>.<action>]\n"
5591 	"\t            [if <filter>]\n\n"
5592 	"\t    Note, special fields can be used as well:\n"
5593 	"\t            common_timestamp - to record current timestamp\n"
5594 	"\t            common_cpu - to record the CPU the event happened on\n"
5595 	"\n"
5596 	"\t    A hist trigger variable can be:\n"
5597 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5598 	"\t        - a reference to another variable e.g. y=$x,\n"
5599 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5600 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5601 	"\n"
5602 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5603 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5604 	"\t    variable reference, field or numeric literal.\n"
5605 	"\n"
5606 	"\t    When a matching event is hit, an entry is added to a hash\n"
5607 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5608 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5609 	"\t    correspond to fields in the event's format description.  Keys\n"
5610 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5611 	"\t    Compound keys consisting of up to two fields can be specified\n"
5612 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5613 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5614 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5615 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5616 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5617 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5618 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5619 	"\t    its histogram data will be shared with other triggers of the\n"
5620 	"\t    same name, and trigger hits will update this common data.\n\n"
5621 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5622 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5623 	"\t    triggers attached to an event, there will be a table for each\n"
5624 	"\t    trigger in the output.  The table displayed for a named\n"
5625 	"\t    trigger will be the same as any other instance having the\n"
5626 	"\t    same name.  The default format used to display a given field\n"
5627 	"\t    can be modified by appending any of the following modifiers\n"
5628 	"\t    to the field name, as applicable:\n\n"
5629 	"\t            .hex        display a number as a hex value\n"
5630 	"\t            .sym        display an address as a symbol\n"
5631 	"\t            .sym-offset display an address as a symbol and offset\n"
5632 	"\t            .execname   display a common_pid as a program name\n"
5633 	"\t            .syscall    display a syscall id as a syscall name\n"
5634 	"\t            .log2       display log2 value rather than raw number\n"
5635 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5636 	"\t            .usecs      display a common_timestamp in microseconds\n"
5637 	"\t            .percent    display a number of percentage value\n"
5638 	"\t            .graph      display a bar-graph of a value\n\n"
5639 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5640 	"\t    trigger or to start a hist trigger but not log any events\n"
5641 	"\t    until told to do so.  'continue' can be used to start or\n"
5642 	"\t    restart a paused hist trigger.\n\n"
5643 	"\t    The 'clear' parameter will clear the contents of a running\n"
5644 	"\t    hist trigger and leave its current paused/active state\n"
5645 	"\t    unchanged.\n\n"
5646 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5647 	"\t    raw hitcount in the histogram.\n\n"
5648 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5649 	"\t    have one event conditionally start and stop another event's\n"
5650 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5651 	"\t    the enable_event and disable_event triggers.\n\n"
5652 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5653 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5654 	"\t        <handler>.<action>\n\n"
5655 	"\t    The available handlers are:\n\n"
5656 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5657 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5658 	"\t        onchange(var)            - invoke action if var changes\n\n"
5659 	"\t    The available actions are:\n\n"
5660 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5661 	"\t        save(field,...)                      - save current event fields\n"
5662 #ifdef CONFIG_TRACER_SNAPSHOT
5663 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5664 #endif
5665 #ifdef CONFIG_SYNTH_EVENTS
5666 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5667 	"\t  Write into this file to define/undefine new synthetic events.\n"
5668 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5669 #endif
5670 #endif
5671 ;
5672 
5673 static ssize_t
5674 tracing_readme_read(struct file *filp, char __user *ubuf,
5675 		       size_t cnt, loff_t *ppos)
5676 {
5677 	return simple_read_from_buffer(ubuf, cnt, ppos,
5678 					readme_msg, strlen(readme_msg));
5679 }
5680 
5681 static const struct file_operations tracing_readme_fops = {
5682 	.open		= tracing_open_generic,
5683 	.read		= tracing_readme_read,
5684 	.llseek		= generic_file_llseek,
5685 };
5686 
5687 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5688 static union trace_eval_map_item *
5689 update_eval_map(union trace_eval_map_item *ptr)
5690 {
5691 	if (!ptr->map.eval_string) {
5692 		if (ptr->tail.next) {
5693 			ptr = ptr->tail.next;
5694 			/* Set ptr to the next real item (skip head) */
5695 			ptr++;
5696 		} else
5697 			return NULL;
5698 	}
5699 	return ptr;
5700 }
5701 
5702 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5703 {
5704 	union trace_eval_map_item *ptr = v;
5705 
5706 	/*
5707 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5708 	 * This really should never happen.
5709 	 */
5710 	(*pos)++;
5711 	ptr = update_eval_map(ptr);
5712 	if (WARN_ON_ONCE(!ptr))
5713 		return NULL;
5714 
5715 	ptr++;
5716 	ptr = update_eval_map(ptr);
5717 
5718 	return ptr;
5719 }
5720 
5721 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5722 {
5723 	union trace_eval_map_item *v;
5724 	loff_t l = 0;
5725 
5726 	mutex_lock(&trace_eval_mutex);
5727 
5728 	v = trace_eval_maps;
5729 	if (v)
5730 		v++;
5731 
5732 	while (v && l < *pos) {
5733 		v = eval_map_next(m, v, &l);
5734 	}
5735 
5736 	return v;
5737 }
5738 
5739 static void eval_map_stop(struct seq_file *m, void *v)
5740 {
5741 	mutex_unlock(&trace_eval_mutex);
5742 }
5743 
5744 static int eval_map_show(struct seq_file *m, void *v)
5745 {
5746 	union trace_eval_map_item *ptr = v;
5747 
5748 	seq_printf(m, "%s %ld (%s)\n",
5749 		   ptr->map.eval_string, ptr->map.eval_value,
5750 		   ptr->map.system);
5751 
5752 	return 0;
5753 }
5754 
5755 static const struct seq_operations tracing_eval_map_seq_ops = {
5756 	.start		= eval_map_start,
5757 	.next		= eval_map_next,
5758 	.stop		= eval_map_stop,
5759 	.show		= eval_map_show,
5760 };
5761 
5762 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5763 {
5764 	int ret;
5765 
5766 	ret = tracing_check_open_get_tr(NULL);
5767 	if (ret)
5768 		return ret;
5769 
5770 	return seq_open(filp, &tracing_eval_map_seq_ops);
5771 }
5772 
5773 static const struct file_operations tracing_eval_map_fops = {
5774 	.open		= tracing_eval_map_open,
5775 	.read		= seq_read,
5776 	.llseek		= seq_lseek,
5777 	.release	= seq_release,
5778 };
5779 
5780 static inline union trace_eval_map_item *
5781 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5782 {
5783 	/* Return tail of array given the head */
5784 	return ptr + ptr->head.length + 1;
5785 }
5786 
5787 static void
5788 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5789 			   int len)
5790 {
5791 	struct trace_eval_map **stop;
5792 	struct trace_eval_map **map;
5793 	union trace_eval_map_item *map_array;
5794 	union trace_eval_map_item *ptr;
5795 
5796 	stop = start + len;
5797 
5798 	/*
5799 	 * The trace_eval_maps contains the map plus a head and tail item,
5800 	 * where the head holds the module and length of array, and the
5801 	 * tail holds a pointer to the next list.
5802 	 */
5803 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5804 	if (!map_array) {
5805 		pr_warn("Unable to allocate trace eval mapping\n");
5806 		return;
5807 	}
5808 
5809 	guard(mutex)(&trace_eval_mutex);
5810 
5811 	if (!trace_eval_maps)
5812 		trace_eval_maps = map_array;
5813 	else {
5814 		ptr = trace_eval_maps;
5815 		for (;;) {
5816 			ptr = trace_eval_jmp_to_tail(ptr);
5817 			if (!ptr->tail.next)
5818 				break;
5819 			ptr = ptr->tail.next;
5820 
5821 		}
5822 		ptr->tail.next = map_array;
5823 	}
5824 	map_array->head.mod = mod;
5825 	map_array->head.length = len;
5826 	map_array++;
5827 
5828 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5829 		map_array->map = **map;
5830 		map_array++;
5831 	}
5832 	memset(map_array, 0, sizeof(*map_array));
5833 }
5834 
5835 static void trace_create_eval_file(struct dentry *d_tracer)
5836 {
5837 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5838 			  NULL, &tracing_eval_map_fops);
5839 }
5840 
5841 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5842 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5843 static inline void trace_insert_eval_map_file(struct module *mod,
5844 			      struct trace_eval_map **start, int len) { }
5845 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5846 
5847 static void trace_insert_eval_map(struct module *mod,
5848 				  struct trace_eval_map **start, int len)
5849 {
5850 	struct trace_eval_map **map;
5851 
5852 	if (len <= 0)
5853 		return;
5854 
5855 	map = start;
5856 
5857 	trace_event_eval_update(map, len);
5858 
5859 	trace_insert_eval_map_file(mod, start, len);
5860 }
5861 
5862 static ssize_t
5863 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5864 		       size_t cnt, loff_t *ppos)
5865 {
5866 	struct trace_array *tr = filp->private_data;
5867 	char buf[MAX_TRACER_SIZE+2];
5868 	int r;
5869 
5870 	mutex_lock(&trace_types_lock);
5871 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5872 	mutex_unlock(&trace_types_lock);
5873 
5874 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5875 }
5876 
5877 int tracer_init(struct tracer *t, struct trace_array *tr)
5878 {
5879 	tracing_reset_online_cpus(&tr->array_buffer);
5880 	return t->init(tr);
5881 }
5882 
5883 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5884 {
5885 	int cpu;
5886 
5887 	for_each_tracing_cpu(cpu)
5888 		per_cpu_ptr(buf->data, cpu)->entries = val;
5889 }
5890 
5891 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5892 {
5893 	if (cpu == RING_BUFFER_ALL_CPUS) {
5894 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5895 	} else {
5896 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5897 	}
5898 }
5899 
5900 #ifdef CONFIG_TRACER_MAX_TRACE
5901 /* resize @tr's buffer to the size of @size_tr's entries */
5902 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5903 					struct array_buffer *size_buf, int cpu_id)
5904 {
5905 	int cpu, ret = 0;
5906 
5907 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5908 		for_each_tracing_cpu(cpu) {
5909 			ret = ring_buffer_resize(trace_buf->buffer,
5910 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5911 			if (ret < 0)
5912 				break;
5913 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5914 				per_cpu_ptr(size_buf->data, cpu)->entries;
5915 		}
5916 	} else {
5917 		ret = ring_buffer_resize(trace_buf->buffer,
5918 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5919 		if (ret == 0)
5920 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5921 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5922 	}
5923 
5924 	return ret;
5925 }
5926 #endif /* CONFIG_TRACER_MAX_TRACE */
5927 
5928 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5929 					unsigned long size, int cpu)
5930 {
5931 	int ret;
5932 
5933 	/*
5934 	 * If kernel or user changes the size of the ring buffer
5935 	 * we use the size that was given, and we can forget about
5936 	 * expanding it later.
5937 	 */
5938 	trace_set_ring_buffer_expanded(tr);
5939 
5940 	/* May be called before buffers are initialized */
5941 	if (!tr->array_buffer.buffer)
5942 		return 0;
5943 
5944 	/* Do not allow tracing while resizing ring buffer */
5945 	tracing_stop_tr(tr);
5946 
5947 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5948 	if (ret < 0)
5949 		goto out_start;
5950 
5951 #ifdef CONFIG_TRACER_MAX_TRACE
5952 	if (!tr->allocated_snapshot)
5953 		goto out;
5954 
5955 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5956 	if (ret < 0) {
5957 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5958 						     &tr->array_buffer, cpu);
5959 		if (r < 0) {
5960 			/*
5961 			 * AARGH! We are left with different
5962 			 * size max buffer!!!!
5963 			 * The max buffer is our "snapshot" buffer.
5964 			 * When a tracer needs a snapshot (one of the
5965 			 * latency tracers), it swaps the max buffer
5966 			 * with the saved snap shot. We succeeded to
5967 			 * update the size of the main buffer, but failed to
5968 			 * update the size of the max buffer. But when we tried
5969 			 * to reset the main buffer to the original size, we
5970 			 * failed there too. This is very unlikely to
5971 			 * happen, but if it does, warn and kill all
5972 			 * tracing.
5973 			 */
5974 			WARN_ON(1);
5975 			tracing_disabled = 1;
5976 		}
5977 		goto out_start;
5978 	}
5979 
5980 	update_buffer_entries(&tr->max_buffer, cpu);
5981 
5982  out:
5983 #endif /* CONFIG_TRACER_MAX_TRACE */
5984 
5985 	update_buffer_entries(&tr->array_buffer, cpu);
5986  out_start:
5987 	tracing_start_tr(tr);
5988 	return ret;
5989 }
5990 
5991 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5992 				  unsigned long size, int cpu_id)
5993 {
5994 	guard(mutex)(&trace_types_lock);
5995 
5996 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5997 		/* make sure, this cpu is enabled in the mask */
5998 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5999 			return -EINVAL;
6000 	}
6001 
6002 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6003 }
6004 
6005 struct trace_mod_entry {
6006 	unsigned long	mod_addr;
6007 	char		mod_name[MODULE_NAME_LEN];
6008 };
6009 
6010 struct trace_scratch {
6011 	unsigned long		text_addr;
6012 	unsigned long		nr_entries;
6013 	struct trace_mod_entry	entries[];
6014 };
6015 
6016 static DEFINE_MUTEX(scratch_mutex);
6017 
6018 static int cmp_mod_entry(const void *key, const void *pivot)
6019 {
6020 	unsigned long addr = (unsigned long)key;
6021 	const struct trace_mod_entry *ent = pivot;
6022 
6023 	if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6024 		return 0;
6025 	else
6026 		return addr - ent->mod_addr;
6027 }
6028 
6029 /**
6030  * trace_adjust_address() - Adjust prev boot address to current address.
6031  * @tr: Persistent ring buffer's trace_array.
6032  * @addr: Address in @tr which is adjusted.
6033  */
6034 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6035 {
6036 	struct trace_module_delta *module_delta;
6037 	struct trace_scratch *tscratch;
6038 	struct trace_mod_entry *entry;
6039 	int idx = 0, nr_entries;
6040 
6041 	/* If we don't have last boot delta, return the address */
6042 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6043 		return addr;
6044 
6045 	/* tr->module_delta must be protected by rcu. */
6046 	guard(rcu)();
6047 	tscratch = tr->scratch;
6048 	/* if there is no tscrach, module_delta must be NULL. */
6049 	module_delta = READ_ONCE(tr->module_delta);
6050 	if (!module_delta || tscratch->entries[0].mod_addr > addr)
6051 		return addr + tr->text_delta;
6052 
6053 	/* Note that entries must be sorted. */
6054 	nr_entries = tscratch->nr_entries;
6055 	if (nr_entries == 1 ||
6056 	    tscratch->entries[nr_entries - 1].mod_addr < addr)
6057 		idx = nr_entries - 1;
6058 	else {
6059 		entry = __inline_bsearch((void *)addr,
6060 				tscratch->entries,
6061 				nr_entries - 1,
6062 				sizeof(tscratch->entries[0]),
6063 				cmp_mod_entry);
6064 		if (entry)
6065 			idx = entry - tscratch->entries;
6066 	}
6067 
6068 	return addr + module_delta->delta[idx];
6069 }
6070 
6071 #ifdef CONFIG_MODULES
6072 static int save_mod(struct module *mod, void *data)
6073 {
6074 	struct trace_array *tr = data;
6075 	struct trace_scratch *tscratch;
6076 	struct trace_mod_entry *entry;
6077 	unsigned int size;
6078 
6079 	tscratch = tr->scratch;
6080 	if (!tscratch)
6081 		return -1;
6082 	size = tr->scratch_size;
6083 
6084 	if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6085 		return -1;
6086 
6087 	entry = &tscratch->entries[tscratch->nr_entries];
6088 
6089 	tscratch->nr_entries++;
6090 
6091 	entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6092 	strscpy(entry->mod_name, mod->name);
6093 
6094 	return 0;
6095 }
6096 #else
6097 static int save_mod(struct module *mod, void *data)
6098 {
6099 	return 0;
6100 }
6101 #endif
6102 
6103 static void update_last_data(struct trace_array *tr)
6104 {
6105 	struct trace_module_delta *module_delta;
6106 	struct trace_scratch *tscratch;
6107 
6108 	if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6109 		return;
6110 
6111 	if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6112 		return;
6113 
6114 	/* Only if the buffer has previous boot data clear and update it. */
6115 	tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6116 
6117 	/* Reset the module list and reload them */
6118 	if (tr->scratch) {
6119 		struct trace_scratch *tscratch = tr->scratch;
6120 
6121 		memset(tscratch->entries, 0,
6122 		       flex_array_size(tscratch, entries, tscratch->nr_entries));
6123 		tscratch->nr_entries = 0;
6124 
6125 		guard(mutex)(&scratch_mutex);
6126 		module_for_each_mod(save_mod, tr);
6127 	}
6128 
6129 	/*
6130 	 * Need to clear all CPU buffers as there cannot be events
6131 	 * from the previous boot mixed with events with this boot
6132 	 * as that will cause a confusing trace. Need to clear all
6133 	 * CPU buffers, even for those that may currently be offline.
6134 	 */
6135 	tracing_reset_all_cpus(&tr->array_buffer);
6136 
6137 	/* Using current data now */
6138 	tr->text_delta = 0;
6139 
6140 	if (!tr->scratch)
6141 		return;
6142 
6143 	tscratch = tr->scratch;
6144 	module_delta = READ_ONCE(tr->module_delta);
6145 	WRITE_ONCE(tr->module_delta, NULL);
6146 	kfree_rcu(module_delta, rcu);
6147 
6148 	/* Set the persistent ring buffer meta data to this address */
6149 	tscratch->text_addr = (unsigned long)_text;
6150 }
6151 
6152 /**
6153  * tracing_update_buffers - used by tracing facility to expand ring buffers
6154  * @tr: The tracing instance
6155  *
6156  * To save on memory when the tracing is never used on a system with it
6157  * configured in. The ring buffers are set to a minimum size. But once
6158  * a user starts to use the tracing facility, then they need to grow
6159  * to their default size.
6160  *
6161  * This function is to be called when a tracer is about to be used.
6162  */
6163 int tracing_update_buffers(struct trace_array *tr)
6164 {
6165 	int ret = 0;
6166 
6167 	mutex_lock(&trace_types_lock);
6168 
6169 	update_last_data(tr);
6170 
6171 	if (!tr->ring_buffer_expanded)
6172 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6173 						RING_BUFFER_ALL_CPUS);
6174 	mutex_unlock(&trace_types_lock);
6175 
6176 	return ret;
6177 }
6178 
6179 struct trace_option_dentry;
6180 
6181 static void
6182 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6183 
6184 /*
6185  * Used to clear out the tracer before deletion of an instance.
6186  * Must have trace_types_lock held.
6187  */
6188 static void tracing_set_nop(struct trace_array *tr)
6189 {
6190 	if (tr->current_trace == &nop_trace)
6191 		return;
6192 
6193 	tr->current_trace->enabled--;
6194 
6195 	if (tr->current_trace->reset)
6196 		tr->current_trace->reset(tr);
6197 
6198 	tr->current_trace = &nop_trace;
6199 }
6200 
6201 static bool tracer_options_updated;
6202 
6203 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6204 {
6205 	/* Only enable if the directory has been created already. */
6206 	if (!tr->dir)
6207 		return;
6208 
6209 	/* Only create trace option files after update_tracer_options finish */
6210 	if (!tracer_options_updated)
6211 		return;
6212 
6213 	create_trace_option_files(tr, t);
6214 }
6215 
6216 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6217 {
6218 	struct tracer *t;
6219 #ifdef CONFIG_TRACER_MAX_TRACE
6220 	bool had_max_tr;
6221 #endif
6222 	int ret;
6223 
6224 	guard(mutex)(&trace_types_lock);
6225 
6226 	update_last_data(tr);
6227 
6228 	if (!tr->ring_buffer_expanded) {
6229 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6230 						RING_BUFFER_ALL_CPUS);
6231 		if (ret < 0)
6232 			return ret;
6233 		ret = 0;
6234 	}
6235 
6236 	for (t = trace_types; t; t = t->next) {
6237 		if (strcmp(t->name, buf) == 0)
6238 			break;
6239 	}
6240 	if (!t)
6241 		return -EINVAL;
6242 
6243 	if (t == tr->current_trace)
6244 		return 0;
6245 
6246 #ifdef CONFIG_TRACER_SNAPSHOT
6247 	if (t->use_max_tr) {
6248 		local_irq_disable();
6249 		arch_spin_lock(&tr->max_lock);
6250 		ret = tr->cond_snapshot ? -EBUSY : 0;
6251 		arch_spin_unlock(&tr->max_lock);
6252 		local_irq_enable();
6253 		if (ret)
6254 			return ret;
6255 	}
6256 #endif
6257 	/* Some tracers won't work on kernel command line */
6258 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6259 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6260 			t->name);
6261 		return -EINVAL;
6262 	}
6263 
6264 	/* Some tracers are only allowed for the top level buffer */
6265 	if (!trace_ok_for_array(t, tr))
6266 		return -EINVAL;
6267 
6268 	/* If trace pipe files are being read, we can't change the tracer */
6269 	if (tr->trace_ref)
6270 		return -EBUSY;
6271 
6272 	trace_branch_disable();
6273 
6274 	tr->current_trace->enabled--;
6275 
6276 	if (tr->current_trace->reset)
6277 		tr->current_trace->reset(tr);
6278 
6279 #ifdef CONFIG_TRACER_MAX_TRACE
6280 	had_max_tr = tr->current_trace->use_max_tr;
6281 
6282 	/* Current trace needs to be nop_trace before synchronize_rcu */
6283 	tr->current_trace = &nop_trace;
6284 
6285 	if (had_max_tr && !t->use_max_tr) {
6286 		/*
6287 		 * We need to make sure that the update_max_tr sees that
6288 		 * current_trace changed to nop_trace to keep it from
6289 		 * swapping the buffers after we resize it.
6290 		 * The update_max_tr is called from interrupts disabled
6291 		 * so a synchronized_sched() is sufficient.
6292 		 */
6293 		synchronize_rcu();
6294 		free_snapshot(tr);
6295 		tracing_disarm_snapshot(tr);
6296 	}
6297 
6298 	if (!had_max_tr && t->use_max_tr) {
6299 		ret = tracing_arm_snapshot_locked(tr);
6300 		if (ret)
6301 			return ret;
6302 	}
6303 #else
6304 	tr->current_trace = &nop_trace;
6305 #endif
6306 
6307 	if (t->init) {
6308 		ret = tracer_init(t, tr);
6309 		if (ret) {
6310 #ifdef CONFIG_TRACER_MAX_TRACE
6311 			if (t->use_max_tr)
6312 				tracing_disarm_snapshot(tr);
6313 #endif
6314 			return ret;
6315 		}
6316 	}
6317 
6318 	tr->current_trace = t;
6319 	tr->current_trace->enabled++;
6320 	trace_branch_enable(tr);
6321 
6322 	return 0;
6323 }
6324 
6325 static ssize_t
6326 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6327 			size_t cnt, loff_t *ppos)
6328 {
6329 	struct trace_array *tr = filp->private_data;
6330 	char buf[MAX_TRACER_SIZE+1];
6331 	char *name;
6332 	size_t ret;
6333 	int err;
6334 
6335 	ret = cnt;
6336 
6337 	if (cnt > MAX_TRACER_SIZE)
6338 		cnt = MAX_TRACER_SIZE;
6339 
6340 	if (copy_from_user(buf, ubuf, cnt))
6341 		return -EFAULT;
6342 
6343 	buf[cnt] = 0;
6344 
6345 	name = strim(buf);
6346 
6347 	err = tracing_set_tracer(tr, name);
6348 	if (err)
6349 		return err;
6350 
6351 	*ppos += ret;
6352 
6353 	return ret;
6354 }
6355 
6356 static ssize_t
6357 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6358 		   size_t cnt, loff_t *ppos)
6359 {
6360 	char buf[64];
6361 	int r;
6362 
6363 	r = snprintf(buf, sizeof(buf), "%ld\n",
6364 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6365 	if (r > sizeof(buf))
6366 		r = sizeof(buf);
6367 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6368 }
6369 
6370 static ssize_t
6371 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6372 		    size_t cnt, loff_t *ppos)
6373 {
6374 	unsigned long val;
6375 	int ret;
6376 
6377 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6378 	if (ret)
6379 		return ret;
6380 
6381 	*ptr = val * 1000;
6382 
6383 	return cnt;
6384 }
6385 
6386 static ssize_t
6387 tracing_thresh_read(struct file *filp, char __user *ubuf,
6388 		    size_t cnt, loff_t *ppos)
6389 {
6390 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6391 }
6392 
6393 static ssize_t
6394 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6395 		     size_t cnt, loff_t *ppos)
6396 {
6397 	struct trace_array *tr = filp->private_data;
6398 	int ret;
6399 
6400 	guard(mutex)(&trace_types_lock);
6401 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6402 	if (ret < 0)
6403 		return ret;
6404 
6405 	if (tr->current_trace->update_thresh) {
6406 		ret = tr->current_trace->update_thresh(tr);
6407 		if (ret < 0)
6408 			return ret;
6409 	}
6410 
6411 	return cnt;
6412 }
6413 
6414 #ifdef CONFIG_TRACER_MAX_TRACE
6415 
6416 static ssize_t
6417 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6418 		     size_t cnt, loff_t *ppos)
6419 {
6420 	struct trace_array *tr = filp->private_data;
6421 
6422 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6423 }
6424 
6425 static ssize_t
6426 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6427 		      size_t cnt, loff_t *ppos)
6428 {
6429 	struct trace_array *tr = filp->private_data;
6430 
6431 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6432 }
6433 
6434 #endif
6435 
6436 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6437 {
6438 	if (cpu == RING_BUFFER_ALL_CPUS) {
6439 		if (cpumask_empty(tr->pipe_cpumask)) {
6440 			cpumask_setall(tr->pipe_cpumask);
6441 			return 0;
6442 		}
6443 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6444 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6445 		return 0;
6446 	}
6447 	return -EBUSY;
6448 }
6449 
6450 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6451 {
6452 	if (cpu == RING_BUFFER_ALL_CPUS) {
6453 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6454 		cpumask_clear(tr->pipe_cpumask);
6455 	} else {
6456 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6457 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6458 	}
6459 }
6460 
6461 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6462 {
6463 	struct trace_array *tr = inode->i_private;
6464 	struct trace_iterator *iter;
6465 	int cpu;
6466 	int ret;
6467 
6468 	ret = tracing_check_open_get_tr(tr);
6469 	if (ret)
6470 		return ret;
6471 
6472 	mutex_lock(&trace_types_lock);
6473 	cpu = tracing_get_cpu(inode);
6474 	ret = open_pipe_on_cpu(tr, cpu);
6475 	if (ret)
6476 		goto fail_pipe_on_cpu;
6477 
6478 	/* create a buffer to store the information to pass to userspace */
6479 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6480 	if (!iter) {
6481 		ret = -ENOMEM;
6482 		goto fail_alloc_iter;
6483 	}
6484 
6485 	trace_seq_init(&iter->seq);
6486 	iter->trace = tr->current_trace;
6487 
6488 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6489 		ret = -ENOMEM;
6490 		goto fail;
6491 	}
6492 
6493 	/* trace pipe does not show start of buffer */
6494 	cpumask_setall(iter->started);
6495 
6496 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6497 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6498 
6499 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6500 	if (trace_clocks[tr->clock_id].in_ns)
6501 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6502 
6503 	iter->tr = tr;
6504 	iter->array_buffer = &tr->array_buffer;
6505 	iter->cpu_file = cpu;
6506 	mutex_init(&iter->mutex);
6507 	filp->private_data = iter;
6508 
6509 	if (iter->trace->pipe_open)
6510 		iter->trace->pipe_open(iter);
6511 
6512 	nonseekable_open(inode, filp);
6513 
6514 	tr->trace_ref++;
6515 
6516 	mutex_unlock(&trace_types_lock);
6517 	return ret;
6518 
6519 fail:
6520 	kfree(iter);
6521 fail_alloc_iter:
6522 	close_pipe_on_cpu(tr, cpu);
6523 fail_pipe_on_cpu:
6524 	__trace_array_put(tr);
6525 	mutex_unlock(&trace_types_lock);
6526 	return ret;
6527 }
6528 
6529 static int tracing_release_pipe(struct inode *inode, struct file *file)
6530 {
6531 	struct trace_iterator *iter = file->private_data;
6532 	struct trace_array *tr = inode->i_private;
6533 
6534 	mutex_lock(&trace_types_lock);
6535 
6536 	tr->trace_ref--;
6537 
6538 	if (iter->trace->pipe_close)
6539 		iter->trace->pipe_close(iter);
6540 	close_pipe_on_cpu(tr, iter->cpu_file);
6541 	mutex_unlock(&trace_types_lock);
6542 
6543 	free_trace_iter_content(iter);
6544 	kfree(iter);
6545 
6546 	trace_array_put(tr);
6547 
6548 	return 0;
6549 }
6550 
6551 static __poll_t
6552 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6553 {
6554 	struct trace_array *tr = iter->tr;
6555 
6556 	/* Iterators are static, they should be filled or empty */
6557 	if (trace_buffer_iter(iter, iter->cpu_file))
6558 		return EPOLLIN | EPOLLRDNORM;
6559 
6560 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6561 		/*
6562 		 * Always select as readable when in blocking mode
6563 		 */
6564 		return EPOLLIN | EPOLLRDNORM;
6565 	else
6566 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6567 					     filp, poll_table, iter->tr->buffer_percent);
6568 }
6569 
6570 static __poll_t
6571 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6572 {
6573 	struct trace_iterator *iter = filp->private_data;
6574 
6575 	return trace_poll(iter, filp, poll_table);
6576 }
6577 
6578 /* Must be called with iter->mutex held. */
6579 static int tracing_wait_pipe(struct file *filp)
6580 {
6581 	struct trace_iterator *iter = filp->private_data;
6582 	int ret;
6583 
6584 	while (trace_empty(iter)) {
6585 
6586 		if ((filp->f_flags & O_NONBLOCK)) {
6587 			return -EAGAIN;
6588 		}
6589 
6590 		/*
6591 		 * We block until we read something and tracing is disabled.
6592 		 * We still block if tracing is disabled, but we have never
6593 		 * read anything. This allows a user to cat this file, and
6594 		 * then enable tracing. But after we have read something,
6595 		 * we give an EOF when tracing is again disabled.
6596 		 *
6597 		 * iter->pos will be 0 if we haven't read anything.
6598 		 */
6599 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6600 			break;
6601 
6602 		mutex_unlock(&iter->mutex);
6603 
6604 		ret = wait_on_pipe(iter, 0);
6605 
6606 		mutex_lock(&iter->mutex);
6607 
6608 		if (ret)
6609 			return ret;
6610 	}
6611 
6612 	return 1;
6613 }
6614 
6615 /*
6616  * Consumer reader.
6617  */
6618 static ssize_t
6619 tracing_read_pipe(struct file *filp, char __user *ubuf,
6620 		  size_t cnt, loff_t *ppos)
6621 {
6622 	struct trace_iterator *iter = filp->private_data;
6623 	ssize_t sret;
6624 
6625 	/*
6626 	 * Avoid more than one consumer on a single file descriptor
6627 	 * This is just a matter of traces coherency, the ring buffer itself
6628 	 * is protected.
6629 	 */
6630 	guard(mutex)(&iter->mutex);
6631 
6632 	/* return any leftover data */
6633 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6634 	if (sret != -EBUSY)
6635 		return sret;
6636 
6637 	trace_seq_init(&iter->seq);
6638 
6639 	if (iter->trace->read) {
6640 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6641 		if (sret)
6642 			return sret;
6643 	}
6644 
6645 waitagain:
6646 	sret = tracing_wait_pipe(filp);
6647 	if (sret <= 0)
6648 		return sret;
6649 
6650 	/* stop when tracing is finished */
6651 	if (trace_empty(iter))
6652 		return 0;
6653 
6654 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6655 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6656 
6657 	/* reset all but tr, trace, and overruns */
6658 	trace_iterator_reset(iter);
6659 	cpumask_clear(iter->started);
6660 	trace_seq_init(&iter->seq);
6661 
6662 	trace_event_read_lock();
6663 	trace_access_lock(iter->cpu_file);
6664 	while (trace_find_next_entry_inc(iter) != NULL) {
6665 		enum print_line_t ret;
6666 		int save_len = iter->seq.seq.len;
6667 
6668 		ret = print_trace_line(iter);
6669 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6670 			/*
6671 			 * If one print_trace_line() fills entire trace_seq in one shot,
6672 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6673 			 * In this case, we need to consume it, otherwise, loop will peek
6674 			 * this event next time, resulting in an infinite loop.
6675 			 */
6676 			if (save_len == 0) {
6677 				iter->seq.full = 0;
6678 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6679 				trace_consume(iter);
6680 				break;
6681 			}
6682 
6683 			/* In other cases, don't print partial lines */
6684 			iter->seq.seq.len = save_len;
6685 			break;
6686 		}
6687 		if (ret != TRACE_TYPE_NO_CONSUME)
6688 			trace_consume(iter);
6689 
6690 		if (trace_seq_used(&iter->seq) >= cnt)
6691 			break;
6692 
6693 		/*
6694 		 * Setting the full flag means we reached the trace_seq buffer
6695 		 * size and we should leave by partial output condition above.
6696 		 * One of the trace_seq_* functions is not used properly.
6697 		 */
6698 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6699 			  iter->ent->type);
6700 	}
6701 	trace_access_unlock(iter->cpu_file);
6702 	trace_event_read_unlock();
6703 
6704 	/* Now copy what we have to the user */
6705 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6706 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6707 		trace_seq_init(&iter->seq);
6708 
6709 	/*
6710 	 * If there was nothing to send to user, in spite of consuming trace
6711 	 * entries, go back to wait for more entries.
6712 	 */
6713 	if (sret == -EBUSY)
6714 		goto waitagain;
6715 
6716 	return sret;
6717 }
6718 
6719 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6720 				     unsigned int idx)
6721 {
6722 	__free_page(spd->pages[idx]);
6723 }
6724 
6725 static size_t
6726 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6727 {
6728 	size_t count;
6729 	int save_len;
6730 	int ret;
6731 
6732 	/* Seq buffer is page-sized, exactly what we need. */
6733 	for (;;) {
6734 		save_len = iter->seq.seq.len;
6735 		ret = print_trace_line(iter);
6736 
6737 		if (trace_seq_has_overflowed(&iter->seq)) {
6738 			iter->seq.seq.len = save_len;
6739 			break;
6740 		}
6741 
6742 		/*
6743 		 * This should not be hit, because it should only
6744 		 * be set if the iter->seq overflowed. But check it
6745 		 * anyway to be safe.
6746 		 */
6747 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6748 			iter->seq.seq.len = save_len;
6749 			break;
6750 		}
6751 
6752 		count = trace_seq_used(&iter->seq) - save_len;
6753 		if (rem < count) {
6754 			rem = 0;
6755 			iter->seq.seq.len = save_len;
6756 			break;
6757 		}
6758 
6759 		if (ret != TRACE_TYPE_NO_CONSUME)
6760 			trace_consume(iter);
6761 		rem -= count;
6762 		if (!trace_find_next_entry_inc(iter))	{
6763 			rem = 0;
6764 			iter->ent = NULL;
6765 			break;
6766 		}
6767 	}
6768 
6769 	return rem;
6770 }
6771 
6772 static ssize_t tracing_splice_read_pipe(struct file *filp,
6773 					loff_t *ppos,
6774 					struct pipe_inode_info *pipe,
6775 					size_t len,
6776 					unsigned int flags)
6777 {
6778 	struct page *pages_def[PIPE_DEF_BUFFERS];
6779 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6780 	struct trace_iterator *iter = filp->private_data;
6781 	struct splice_pipe_desc spd = {
6782 		.pages		= pages_def,
6783 		.partial	= partial_def,
6784 		.nr_pages	= 0, /* This gets updated below. */
6785 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6786 		.ops		= &default_pipe_buf_ops,
6787 		.spd_release	= tracing_spd_release_pipe,
6788 	};
6789 	ssize_t ret;
6790 	size_t rem;
6791 	unsigned int i;
6792 
6793 	if (splice_grow_spd(pipe, &spd))
6794 		return -ENOMEM;
6795 
6796 	mutex_lock(&iter->mutex);
6797 
6798 	if (iter->trace->splice_read) {
6799 		ret = iter->trace->splice_read(iter, filp,
6800 					       ppos, pipe, len, flags);
6801 		if (ret)
6802 			goto out_err;
6803 	}
6804 
6805 	ret = tracing_wait_pipe(filp);
6806 	if (ret <= 0)
6807 		goto out_err;
6808 
6809 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6810 		ret = -EFAULT;
6811 		goto out_err;
6812 	}
6813 
6814 	trace_event_read_lock();
6815 	trace_access_lock(iter->cpu_file);
6816 
6817 	/* Fill as many pages as possible. */
6818 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6819 		spd.pages[i] = alloc_page(GFP_KERNEL);
6820 		if (!spd.pages[i])
6821 			break;
6822 
6823 		rem = tracing_fill_pipe_page(rem, iter);
6824 
6825 		/* Copy the data into the page, so we can start over. */
6826 		ret = trace_seq_to_buffer(&iter->seq,
6827 					  page_address(spd.pages[i]),
6828 					  trace_seq_used(&iter->seq));
6829 		if (ret < 0) {
6830 			__free_page(spd.pages[i]);
6831 			break;
6832 		}
6833 		spd.partial[i].offset = 0;
6834 		spd.partial[i].len = trace_seq_used(&iter->seq);
6835 
6836 		trace_seq_init(&iter->seq);
6837 	}
6838 
6839 	trace_access_unlock(iter->cpu_file);
6840 	trace_event_read_unlock();
6841 	mutex_unlock(&iter->mutex);
6842 
6843 	spd.nr_pages = i;
6844 
6845 	if (i)
6846 		ret = splice_to_pipe(pipe, &spd);
6847 	else
6848 		ret = 0;
6849 out:
6850 	splice_shrink_spd(&spd);
6851 	return ret;
6852 
6853 out_err:
6854 	mutex_unlock(&iter->mutex);
6855 	goto out;
6856 }
6857 
6858 static ssize_t
6859 tracing_entries_read(struct file *filp, char __user *ubuf,
6860 		     size_t cnt, loff_t *ppos)
6861 {
6862 	struct inode *inode = file_inode(filp);
6863 	struct trace_array *tr = inode->i_private;
6864 	int cpu = tracing_get_cpu(inode);
6865 	char buf[64];
6866 	int r = 0;
6867 	ssize_t ret;
6868 
6869 	mutex_lock(&trace_types_lock);
6870 
6871 	if (cpu == RING_BUFFER_ALL_CPUS) {
6872 		int cpu, buf_size_same;
6873 		unsigned long size;
6874 
6875 		size = 0;
6876 		buf_size_same = 1;
6877 		/* check if all cpu sizes are same */
6878 		for_each_tracing_cpu(cpu) {
6879 			/* fill in the size from first enabled cpu */
6880 			if (size == 0)
6881 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6882 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6883 				buf_size_same = 0;
6884 				break;
6885 			}
6886 		}
6887 
6888 		if (buf_size_same) {
6889 			if (!tr->ring_buffer_expanded)
6890 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6891 					    size >> 10,
6892 					    trace_buf_size >> 10);
6893 			else
6894 				r = sprintf(buf, "%lu\n", size >> 10);
6895 		} else
6896 			r = sprintf(buf, "X\n");
6897 	} else
6898 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6899 
6900 	mutex_unlock(&trace_types_lock);
6901 
6902 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6903 	return ret;
6904 }
6905 
6906 static ssize_t
6907 tracing_entries_write(struct file *filp, const char __user *ubuf,
6908 		      size_t cnt, loff_t *ppos)
6909 {
6910 	struct inode *inode = file_inode(filp);
6911 	struct trace_array *tr = inode->i_private;
6912 	unsigned long val;
6913 	int ret;
6914 
6915 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6916 	if (ret)
6917 		return ret;
6918 
6919 	/* must have at least 1 entry */
6920 	if (!val)
6921 		return -EINVAL;
6922 
6923 	/* value is in KB */
6924 	val <<= 10;
6925 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6926 	if (ret < 0)
6927 		return ret;
6928 
6929 	*ppos += cnt;
6930 
6931 	return cnt;
6932 }
6933 
6934 static ssize_t
6935 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6936 				size_t cnt, loff_t *ppos)
6937 {
6938 	struct trace_array *tr = filp->private_data;
6939 	char buf[64];
6940 	int r, cpu;
6941 	unsigned long size = 0, expanded_size = 0;
6942 
6943 	mutex_lock(&trace_types_lock);
6944 	for_each_tracing_cpu(cpu) {
6945 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6946 		if (!tr->ring_buffer_expanded)
6947 			expanded_size += trace_buf_size >> 10;
6948 	}
6949 	if (tr->ring_buffer_expanded)
6950 		r = sprintf(buf, "%lu\n", size);
6951 	else
6952 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6953 	mutex_unlock(&trace_types_lock);
6954 
6955 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6956 }
6957 
6958 #define LAST_BOOT_HEADER ((void *)1)
6959 
6960 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6961 {
6962 	struct trace_array *tr = m->private;
6963 	struct trace_scratch *tscratch = tr->scratch;
6964 	unsigned int index = *pos;
6965 
6966 	(*pos)++;
6967 
6968 	if (*pos == 1)
6969 		return LAST_BOOT_HEADER;
6970 
6971 	/* Only show offsets of the last boot data */
6972 	if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6973 		return NULL;
6974 
6975 	/* *pos 0 is for the header, 1 is for the first module */
6976 	index--;
6977 
6978 	if (index >= tscratch->nr_entries)
6979 		return NULL;
6980 
6981 	return &tscratch->entries[index];
6982 }
6983 
6984 static void *l_start(struct seq_file *m, loff_t *pos)
6985 {
6986 	mutex_lock(&scratch_mutex);
6987 
6988 	return l_next(m, NULL, pos);
6989 }
6990 
6991 static void l_stop(struct seq_file *m, void *p)
6992 {
6993 	mutex_unlock(&scratch_mutex);
6994 }
6995 
6996 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6997 {
6998 	struct trace_scratch *tscratch = tr->scratch;
6999 
7000 	/*
7001 	 * Do not leak KASLR address. This only shows the KASLR address of
7002 	 * the last boot. When the ring buffer is started, the LAST_BOOT
7003 	 * flag gets cleared, and this should only report "current".
7004 	 * Otherwise it shows the KASLR address from the previous boot which
7005 	 * should not be the same as the current boot.
7006 	 */
7007 	if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7008 		seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7009 	else
7010 		seq_puts(m, "# Current\n");
7011 }
7012 
7013 static int l_show(struct seq_file *m, void *v)
7014 {
7015 	struct trace_array *tr = m->private;
7016 	struct trace_mod_entry *entry = v;
7017 
7018 	if (v == LAST_BOOT_HEADER) {
7019 		show_last_boot_header(m, tr);
7020 		return 0;
7021 	}
7022 
7023 	seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7024 	return 0;
7025 }
7026 
7027 static const struct seq_operations last_boot_seq_ops = {
7028 	.start		= l_start,
7029 	.next		= l_next,
7030 	.stop		= l_stop,
7031 	.show		= l_show,
7032 };
7033 
7034 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7035 {
7036 	struct trace_array *tr = inode->i_private;
7037 	struct seq_file *m;
7038 	int ret;
7039 
7040 	ret = tracing_check_open_get_tr(tr);
7041 	if (ret)
7042 		return ret;
7043 
7044 	ret = seq_open(file, &last_boot_seq_ops);
7045 	if (ret) {
7046 		trace_array_put(tr);
7047 		return ret;
7048 	}
7049 
7050 	m = file->private_data;
7051 	m->private = tr;
7052 
7053 	return 0;
7054 }
7055 
7056 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7057 {
7058 	struct trace_array *tr = inode->i_private;
7059 	int cpu = tracing_get_cpu(inode);
7060 	int ret;
7061 
7062 	ret = tracing_check_open_get_tr(tr);
7063 	if (ret)
7064 		return ret;
7065 
7066 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7067 	if (ret < 0)
7068 		__trace_array_put(tr);
7069 	return ret;
7070 }
7071 
7072 static ssize_t
7073 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7074 			  size_t cnt, loff_t *ppos)
7075 {
7076 	/*
7077 	 * There is no need to read what the user has written, this function
7078 	 * is just to make sure that there is no error when "echo" is used
7079 	 */
7080 
7081 	*ppos += cnt;
7082 
7083 	return cnt;
7084 }
7085 
7086 static int
7087 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7088 {
7089 	struct trace_array *tr = inode->i_private;
7090 
7091 	/* disable tracing ? */
7092 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7093 		tracer_tracing_off(tr);
7094 	/* resize the ring buffer to 0 */
7095 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7096 
7097 	trace_array_put(tr);
7098 
7099 	return 0;
7100 }
7101 
7102 #define TRACE_MARKER_MAX_SIZE		4096
7103 
7104 static ssize_t
7105 tracing_mark_write(struct file *filp, const char __user *ubuf,
7106 					size_t cnt, loff_t *fpos)
7107 {
7108 	struct trace_array *tr = filp->private_data;
7109 	struct ring_buffer_event *event;
7110 	enum event_trigger_type tt = ETT_NONE;
7111 	struct trace_buffer *buffer;
7112 	struct print_entry *entry;
7113 	int meta_size;
7114 	ssize_t written;
7115 	size_t size;
7116 	int len;
7117 
7118 /* Used in tracing_mark_raw_write() as well */
7119 #define FAULTED_STR "<faulted>"
7120 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7121 
7122 	if (tracing_disabled)
7123 		return -EINVAL;
7124 
7125 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7126 		return -EINVAL;
7127 
7128 	if ((ssize_t)cnt < 0)
7129 		return -EINVAL;
7130 
7131 	if (cnt > TRACE_MARKER_MAX_SIZE)
7132 		cnt = TRACE_MARKER_MAX_SIZE;
7133 
7134 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7135  again:
7136 	size = cnt + meta_size;
7137 
7138 	/* If less than "<faulted>", then make sure we can still add that */
7139 	if (cnt < FAULTED_SIZE)
7140 		size += FAULTED_SIZE - cnt;
7141 
7142 	buffer = tr->array_buffer.buffer;
7143 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7144 					    tracing_gen_ctx());
7145 	if (unlikely(!event)) {
7146 		/*
7147 		 * If the size was greater than what was allowed, then
7148 		 * make it smaller and try again.
7149 		 */
7150 		if (size > ring_buffer_max_event_size(buffer)) {
7151 			/* cnt < FAULTED size should never be bigger than max */
7152 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7153 				return -EBADF;
7154 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7155 			/* The above should only happen once */
7156 			if (WARN_ON_ONCE(cnt + meta_size == size))
7157 				return -EBADF;
7158 			goto again;
7159 		}
7160 
7161 		/* Ring buffer disabled, return as if not open for write */
7162 		return -EBADF;
7163 	}
7164 
7165 	entry = ring_buffer_event_data(event);
7166 	entry->ip = _THIS_IP_;
7167 
7168 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7169 	if (len) {
7170 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7171 		cnt = FAULTED_SIZE;
7172 		written = -EFAULT;
7173 	} else
7174 		written = cnt;
7175 
7176 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7177 		/* do not add \n before testing triggers, but add \0 */
7178 		entry->buf[cnt] = '\0';
7179 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7180 	}
7181 
7182 	if (entry->buf[cnt - 1] != '\n') {
7183 		entry->buf[cnt] = '\n';
7184 		entry->buf[cnt + 1] = '\0';
7185 	} else
7186 		entry->buf[cnt] = '\0';
7187 
7188 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7189 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7190 	__buffer_unlock_commit(buffer, event);
7191 
7192 	if (tt)
7193 		event_triggers_post_call(tr->trace_marker_file, tt);
7194 
7195 	return written;
7196 }
7197 
7198 static ssize_t
7199 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7200 					size_t cnt, loff_t *fpos)
7201 {
7202 	struct trace_array *tr = filp->private_data;
7203 	struct ring_buffer_event *event;
7204 	struct trace_buffer *buffer;
7205 	struct raw_data_entry *entry;
7206 	ssize_t written;
7207 	int size;
7208 	int len;
7209 
7210 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7211 
7212 	if (tracing_disabled)
7213 		return -EINVAL;
7214 
7215 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7216 		return -EINVAL;
7217 
7218 	/* The marker must at least have a tag id */
7219 	if (cnt < sizeof(unsigned int))
7220 		return -EINVAL;
7221 
7222 	size = sizeof(*entry) + cnt;
7223 	if (cnt < FAULT_SIZE_ID)
7224 		size += FAULT_SIZE_ID - cnt;
7225 
7226 	buffer = tr->array_buffer.buffer;
7227 
7228 	if (size > ring_buffer_max_event_size(buffer))
7229 		return -EINVAL;
7230 
7231 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7232 					    tracing_gen_ctx());
7233 	if (!event)
7234 		/* Ring buffer disabled, return as if not open for write */
7235 		return -EBADF;
7236 
7237 	entry = ring_buffer_event_data(event);
7238 
7239 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7240 	if (len) {
7241 		entry->id = -1;
7242 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7243 		written = -EFAULT;
7244 	} else
7245 		written = cnt;
7246 
7247 	__buffer_unlock_commit(buffer, event);
7248 
7249 	return written;
7250 }
7251 
7252 static int tracing_clock_show(struct seq_file *m, void *v)
7253 {
7254 	struct trace_array *tr = m->private;
7255 	int i;
7256 
7257 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7258 		seq_printf(m,
7259 			"%s%s%s%s", i ? " " : "",
7260 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7261 			i == tr->clock_id ? "]" : "");
7262 	seq_putc(m, '\n');
7263 
7264 	return 0;
7265 }
7266 
7267 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7268 {
7269 	int i;
7270 
7271 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7272 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7273 			break;
7274 	}
7275 	if (i == ARRAY_SIZE(trace_clocks))
7276 		return -EINVAL;
7277 
7278 	mutex_lock(&trace_types_lock);
7279 
7280 	tr->clock_id = i;
7281 
7282 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7283 
7284 	/*
7285 	 * New clock may not be consistent with the previous clock.
7286 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7287 	 */
7288 	tracing_reset_online_cpus(&tr->array_buffer);
7289 
7290 #ifdef CONFIG_TRACER_MAX_TRACE
7291 	if (tr->max_buffer.buffer)
7292 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7293 	tracing_reset_online_cpus(&tr->max_buffer);
7294 #endif
7295 
7296 	mutex_unlock(&trace_types_lock);
7297 
7298 	return 0;
7299 }
7300 
7301 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7302 				   size_t cnt, loff_t *fpos)
7303 {
7304 	struct seq_file *m = filp->private_data;
7305 	struct trace_array *tr = m->private;
7306 	char buf[64];
7307 	const char *clockstr;
7308 	int ret;
7309 
7310 	if (cnt >= sizeof(buf))
7311 		return -EINVAL;
7312 
7313 	if (copy_from_user(buf, ubuf, cnt))
7314 		return -EFAULT;
7315 
7316 	buf[cnt] = 0;
7317 
7318 	clockstr = strstrip(buf);
7319 
7320 	ret = tracing_set_clock(tr, clockstr);
7321 	if (ret)
7322 		return ret;
7323 
7324 	*fpos += cnt;
7325 
7326 	return cnt;
7327 }
7328 
7329 static int tracing_clock_open(struct inode *inode, struct file *file)
7330 {
7331 	struct trace_array *tr = inode->i_private;
7332 	int ret;
7333 
7334 	ret = tracing_check_open_get_tr(tr);
7335 	if (ret)
7336 		return ret;
7337 
7338 	ret = single_open(file, tracing_clock_show, inode->i_private);
7339 	if (ret < 0)
7340 		trace_array_put(tr);
7341 
7342 	return ret;
7343 }
7344 
7345 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7346 {
7347 	struct trace_array *tr = m->private;
7348 
7349 	mutex_lock(&trace_types_lock);
7350 
7351 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7352 		seq_puts(m, "delta [absolute]\n");
7353 	else
7354 		seq_puts(m, "[delta] absolute\n");
7355 
7356 	mutex_unlock(&trace_types_lock);
7357 
7358 	return 0;
7359 }
7360 
7361 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7362 {
7363 	struct trace_array *tr = inode->i_private;
7364 	int ret;
7365 
7366 	ret = tracing_check_open_get_tr(tr);
7367 	if (ret)
7368 		return ret;
7369 
7370 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7371 	if (ret < 0)
7372 		trace_array_put(tr);
7373 
7374 	return ret;
7375 }
7376 
7377 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7378 {
7379 	if (rbe == this_cpu_read(trace_buffered_event))
7380 		return ring_buffer_time_stamp(buffer);
7381 
7382 	return ring_buffer_event_time_stamp(buffer, rbe);
7383 }
7384 
7385 /*
7386  * Set or disable using the per CPU trace_buffer_event when possible.
7387  */
7388 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7389 {
7390 	guard(mutex)(&trace_types_lock);
7391 
7392 	if (set && tr->no_filter_buffering_ref++)
7393 		return 0;
7394 
7395 	if (!set) {
7396 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7397 			return -EINVAL;
7398 
7399 		--tr->no_filter_buffering_ref;
7400 	}
7401 
7402 	return 0;
7403 }
7404 
7405 struct ftrace_buffer_info {
7406 	struct trace_iterator	iter;
7407 	void			*spare;
7408 	unsigned int		spare_cpu;
7409 	unsigned int		spare_size;
7410 	unsigned int		read;
7411 };
7412 
7413 #ifdef CONFIG_TRACER_SNAPSHOT
7414 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7415 {
7416 	struct trace_array *tr = inode->i_private;
7417 	struct trace_iterator *iter;
7418 	struct seq_file *m;
7419 	int ret;
7420 
7421 	ret = tracing_check_open_get_tr(tr);
7422 	if (ret)
7423 		return ret;
7424 
7425 	if (file->f_mode & FMODE_READ) {
7426 		iter = __tracing_open(inode, file, true);
7427 		if (IS_ERR(iter))
7428 			ret = PTR_ERR(iter);
7429 	} else {
7430 		/* Writes still need the seq_file to hold the private data */
7431 		ret = -ENOMEM;
7432 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7433 		if (!m)
7434 			goto out;
7435 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7436 		if (!iter) {
7437 			kfree(m);
7438 			goto out;
7439 		}
7440 		ret = 0;
7441 
7442 		iter->tr = tr;
7443 		iter->array_buffer = &tr->max_buffer;
7444 		iter->cpu_file = tracing_get_cpu(inode);
7445 		m->private = iter;
7446 		file->private_data = m;
7447 	}
7448 out:
7449 	if (ret < 0)
7450 		trace_array_put(tr);
7451 
7452 	return ret;
7453 }
7454 
7455 static void tracing_swap_cpu_buffer(void *tr)
7456 {
7457 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7458 }
7459 
7460 static ssize_t
7461 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7462 		       loff_t *ppos)
7463 {
7464 	struct seq_file *m = filp->private_data;
7465 	struct trace_iterator *iter = m->private;
7466 	struct trace_array *tr = iter->tr;
7467 	unsigned long val;
7468 	int ret;
7469 
7470 	ret = tracing_update_buffers(tr);
7471 	if (ret < 0)
7472 		return ret;
7473 
7474 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7475 	if (ret)
7476 		return ret;
7477 
7478 	guard(mutex)(&trace_types_lock);
7479 
7480 	if (tr->current_trace->use_max_tr)
7481 		return -EBUSY;
7482 
7483 	local_irq_disable();
7484 	arch_spin_lock(&tr->max_lock);
7485 	if (tr->cond_snapshot)
7486 		ret = -EBUSY;
7487 	arch_spin_unlock(&tr->max_lock);
7488 	local_irq_enable();
7489 	if (ret)
7490 		return ret;
7491 
7492 	switch (val) {
7493 	case 0:
7494 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7495 			return -EINVAL;
7496 		if (tr->allocated_snapshot)
7497 			free_snapshot(tr);
7498 		break;
7499 	case 1:
7500 /* Only allow per-cpu swap if the ring buffer supports it */
7501 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7502 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7503 			return -EINVAL;
7504 #endif
7505 		if (tr->allocated_snapshot)
7506 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7507 					&tr->array_buffer, iter->cpu_file);
7508 
7509 		ret = tracing_arm_snapshot_locked(tr);
7510 		if (ret)
7511 			return ret;
7512 
7513 		/* Now, we're going to swap */
7514 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7515 			local_irq_disable();
7516 			update_max_tr(tr, current, smp_processor_id(), NULL);
7517 			local_irq_enable();
7518 		} else {
7519 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7520 						 (void *)tr, 1);
7521 		}
7522 		tracing_disarm_snapshot(tr);
7523 		break;
7524 	default:
7525 		if (tr->allocated_snapshot) {
7526 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7527 				tracing_reset_online_cpus(&tr->max_buffer);
7528 			else
7529 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7530 		}
7531 		break;
7532 	}
7533 
7534 	if (ret >= 0) {
7535 		*ppos += cnt;
7536 		ret = cnt;
7537 	}
7538 
7539 	return ret;
7540 }
7541 
7542 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7543 {
7544 	struct seq_file *m = file->private_data;
7545 	int ret;
7546 
7547 	ret = tracing_release(inode, file);
7548 
7549 	if (file->f_mode & FMODE_READ)
7550 		return ret;
7551 
7552 	/* If write only, the seq_file is just a stub */
7553 	if (m)
7554 		kfree(m->private);
7555 	kfree(m);
7556 
7557 	return 0;
7558 }
7559 
7560 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7561 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7562 				    size_t count, loff_t *ppos);
7563 static int tracing_buffers_release(struct inode *inode, struct file *file);
7564 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7565 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7566 
7567 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7568 {
7569 	struct ftrace_buffer_info *info;
7570 	int ret;
7571 
7572 	/* The following checks for tracefs lockdown */
7573 	ret = tracing_buffers_open(inode, filp);
7574 	if (ret < 0)
7575 		return ret;
7576 
7577 	info = filp->private_data;
7578 
7579 	if (info->iter.trace->use_max_tr) {
7580 		tracing_buffers_release(inode, filp);
7581 		return -EBUSY;
7582 	}
7583 
7584 	info->iter.snapshot = true;
7585 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7586 
7587 	return ret;
7588 }
7589 
7590 #endif /* CONFIG_TRACER_SNAPSHOT */
7591 
7592 
7593 static const struct file_operations tracing_thresh_fops = {
7594 	.open		= tracing_open_generic,
7595 	.read		= tracing_thresh_read,
7596 	.write		= tracing_thresh_write,
7597 	.llseek		= generic_file_llseek,
7598 };
7599 
7600 #ifdef CONFIG_TRACER_MAX_TRACE
7601 static const struct file_operations tracing_max_lat_fops = {
7602 	.open		= tracing_open_generic_tr,
7603 	.read		= tracing_max_lat_read,
7604 	.write		= tracing_max_lat_write,
7605 	.llseek		= generic_file_llseek,
7606 	.release	= tracing_release_generic_tr,
7607 };
7608 #endif
7609 
7610 static const struct file_operations set_tracer_fops = {
7611 	.open		= tracing_open_generic_tr,
7612 	.read		= tracing_set_trace_read,
7613 	.write		= tracing_set_trace_write,
7614 	.llseek		= generic_file_llseek,
7615 	.release	= tracing_release_generic_tr,
7616 };
7617 
7618 static const struct file_operations tracing_pipe_fops = {
7619 	.open		= tracing_open_pipe,
7620 	.poll		= tracing_poll_pipe,
7621 	.read		= tracing_read_pipe,
7622 	.splice_read	= tracing_splice_read_pipe,
7623 	.release	= tracing_release_pipe,
7624 };
7625 
7626 static const struct file_operations tracing_entries_fops = {
7627 	.open		= tracing_open_generic_tr,
7628 	.read		= tracing_entries_read,
7629 	.write		= tracing_entries_write,
7630 	.llseek		= generic_file_llseek,
7631 	.release	= tracing_release_generic_tr,
7632 };
7633 
7634 static const struct file_operations tracing_buffer_meta_fops = {
7635 	.open		= tracing_buffer_meta_open,
7636 	.read		= seq_read,
7637 	.llseek		= seq_lseek,
7638 	.release	= tracing_seq_release,
7639 };
7640 
7641 static const struct file_operations tracing_total_entries_fops = {
7642 	.open		= tracing_open_generic_tr,
7643 	.read		= tracing_total_entries_read,
7644 	.llseek		= generic_file_llseek,
7645 	.release	= tracing_release_generic_tr,
7646 };
7647 
7648 static const struct file_operations tracing_free_buffer_fops = {
7649 	.open		= tracing_open_generic_tr,
7650 	.write		= tracing_free_buffer_write,
7651 	.release	= tracing_free_buffer_release,
7652 };
7653 
7654 static const struct file_operations tracing_mark_fops = {
7655 	.open		= tracing_mark_open,
7656 	.write		= tracing_mark_write,
7657 	.release	= tracing_release_generic_tr,
7658 };
7659 
7660 static const struct file_operations tracing_mark_raw_fops = {
7661 	.open		= tracing_mark_open,
7662 	.write		= tracing_mark_raw_write,
7663 	.release	= tracing_release_generic_tr,
7664 };
7665 
7666 static const struct file_operations trace_clock_fops = {
7667 	.open		= tracing_clock_open,
7668 	.read		= seq_read,
7669 	.llseek		= seq_lseek,
7670 	.release	= tracing_single_release_tr,
7671 	.write		= tracing_clock_write,
7672 };
7673 
7674 static const struct file_operations trace_time_stamp_mode_fops = {
7675 	.open		= tracing_time_stamp_mode_open,
7676 	.read		= seq_read,
7677 	.llseek		= seq_lseek,
7678 	.release	= tracing_single_release_tr,
7679 };
7680 
7681 static const struct file_operations last_boot_fops = {
7682 	.open		= tracing_last_boot_open,
7683 	.read		= seq_read,
7684 	.llseek		= seq_lseek,
7685 	.release	= tracing_seq_release,
7686 };
7687 
7688 #ifdef CONFIG_TRACER_SNAPSHOT
7689 static const struct file_operations snapshot_fops = {
7690 	.open		= tracing_snapshot_open,
7691 	.read		= seq_read,
7692 	.write		= tracing_snapshot_write,
7693 	.llseek		= tracing_lseek,
7694 	.release	= tracing_snapshot_release,
7695 };
7696 
7697 static const struct file_operations snapshot_raw_fops = {
7698 	.open		= snapshot_raw_open,
7699 	.read		= tracing_buffers_read,
7700 	.release	= tracing_buffers_release,
7701 	.splice_read	= tracing_buffers_splice_read,
7702 };
7703 
7704 #endif /* CONFIG_TRACER_SNAPSHOT */
7705 
7706 /*
7707  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7708  * @filp: The active open file structure
7709  * @ubuf: The userspace provided buffer to read value into
7710  * @cnt: The maximum number of bytes to read
7711  * @ppos: The current "file" position
7712  *
7713  * This function implements the write interface for a struct trace_min_max_param.
7714  * The filp->private_data must point to a trace_min_max_param structure that
7715  * defines where to write the value, the min and the max acceptable values,
7716  * and a lock to protect the write.
7717  */
7718 static ssize_t
7719 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7720 {
7721 	struct trace_min_max_param *param = filp->private_data;
7722 	u64 val;
7723 	int err;
7724 
7725 	if (!param)
7726 		return -EFAULT;
7727 
7728 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7729 	if (err)
7730 		return err;
7731 
7732 	if (param->lock)
7733 		mutex_lock(param->lock);
7734 
7735 	if (param->min && val < *param->min)
7736 		err = -EINVAL;
7737 
7738 	if (param->max && val > *param->max)
7739 		err = -EINVAL;
7740 
7741 	if (!err)
7742 		*param->val = val;
7743 
7744 	if (param->lock)
7745 		mutex_unlock(param->lock);
7746 
7747 	if (err)
7748 		return err;
7749 
7750 	return cnt;
7751 }
7752 
7753 /*
7754  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7755  * @filp: The active open file structure
7756  * @ubuf: The userspace provided buffer to read value into
7757  * @cnt: The maximum number of bytes to read
7758  * @ppos: The current "file" position
7759  *
7760  * This function implements the read interface for a struct trace_min_max_param.
7761  * The filp->private_data must point to a trace_min_max_param struct with valid
7762  * data.
7763  */
7764 static ssize_t
7765 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7766 {
7767 	struct trace_min_max_param *param = filp->private_data;
7768 	char buf[U64_STR_SIZE];
7769 	int len;
7770 	u64 val;
7771 
7772 	if (!param)
7773 		return -EFAULT;
7774 
7775 	val = *param->val;
7776 
7777 	if (cnt > sizeof(buf))
7778 		cnt = sizeof(buf);
7779 
7780 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7781 
7782 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7783 }
7784 
7785 const struct file_operations trace_min_max_fops = {
7786 	.open		= tracing_open_generic,
7787 	.read		= trace_min_max_read,
7788 	.write		= trace_min_max_write,
7789 };
7790 
7791 #define TRACING_LOG_ERRS_MAX	8
7792 #define TRACING_LOG_LOC_MAX	128
7793 
7794 #define CMD_PREFIX "  Command: "
7795 
7796 struct err_info {
7797 	const char	**errs;	/* ptr to loc-specific array of err strings */
7798 	u8		type;	/* index into errs -> specific err string */
7799 	u16		pos;	/* caret position */
7800 	u64		ts;
7801 };
7802 
7803 struct tracing_log_err {
7804 	struct list_head	list;
7805 	struct err_info		info;
7806 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7807 	char			*cmd;                     /* what caused err */
7808 };
7809 
7810 static DEFINE_MUTEX(tracing_err_log_lock);
7811 
7812 static struct tracing_log_err *alloc_tracing_log_err(int len)
7813 {
7814 	struct tracing_log_err *err;
7815 
7816 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7817 	if (!err)
7818 		return ERR_PTR(-ENOMEM);
7819 
7820 	err->cmd = kzalloc(len, GFP_KERNEL);
7821 	if (!err->cmd) {
7822 		kfree(err);
7823 		return ERR_PTR(-ENOMEM);
7824 	}
7825 
7826 	return err;
7827 }
7828 
7829 static void free_tracing_log_err(struct tracing_log_err *err)
7830 {
7831 	kfree(err->cmd);
7832 	kfree(err);
7833 }
7834 
7835 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7836 						   int len)
7837 {
7838 	struct tracing_log_err *err;
7839 	char *cmd;
7840 
7841 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7842 		err = alloc_tracing_log_err(len);
7843 		if (PTR_ERR(err) != -ENOMEM)
7844 			tr->n_err_log_entries++;
7845 
7846 		return err;
7847 	}
7848 	cmd = kzalloc(len, GFP_KERNEL);
7849 	if (!cmd)
7850 		return ERR_PTR(-ENOMEM);
7851 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7852 	kfree(err->cmd);
7853 	err->cmd = cmd;
7854 	list_del(&err->list);
7855 
7856 	return err;
7857 }
7858 
7859 /**
7860  * err_pos - find the position of a string within a command for error careting
7861  * @cmd: The tracing command that caused the error
7862  * @str: The string to position the caret at within @cmd
7863  *
7864  * Finds the position of the first occurrence of @str within @cmd.  The
7865  * return value can be passed to tracing_log_err() for caret placement
7866  * within @cmd.
7867  *
7868  * Returns the index within @cmd of the first occurrence of @str or 0
7869  * if @str was not found.
7870  */
7871 unsigned int err_pos(char *cmd, const char *str)
7872 {
7873 	char *found;
7874 
7875 	if (WARN_ON(!strlen(cmd)))
7876 		return 0;
7877 
7878 	found = strstr(cmd, str);
7879 	if (found)
7880 		return found - cmd;
7881 
7882 	return 0;
7883 }
7884 
7885 /**
7886  * tracing_log_err - write an error to the tracing error log
7887  * @tr: The associated trace array for the error (NULL for top level array)
7888  * @loc: A string describing where the error occurred
7889  * @cmd: The tracing command that caused the error
7890  * @errs: The array of loc-specific static error strings
7891  * @type: The index into errs[], which produces the specific static err string
7892  * @pos: The position the caret should be placed in the cmd
7893  *
7894  * Writes an error into tracing/error_log of the form:
7895  *
7896  * <loc>: error: <text>
7897  *   Command: <cmd>
7898  *              ^
7899  *
7900  * tracing/error_log is a small log file containing the last
7901  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7902  * unless there has been a tracing error, and the error log can be
7903  * cleared and have its memory freed by writing the empty string in
7904  * truncation mode to it i.e. echo > tracing/error_log.
7905  *
7906  * NOTE: the @errs array along with the @type param are used to
7907  * produce a static error string - this string is not copied and saved
7908  * when the error is logged - only a pointer to it is saved.  See
7909  * existing callers for examples of how static strings are typically
7910  * defined for use with tracing_log_err().
7911  */
7912 void tracing_log_err(struct trace_array *tr,
7913 		     const char *loc, const char *cmd,
7914 		     const char **errs, u8 type, u16 pos)
7915 {
7916 	struct tracing_log_err *err;
7917 	int len = 0;
7918 
7919 	if (!tr)
7920 		tr = &global_trace;
7921 
7922 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7923 
7924 	guard(mutex)(&tracing_err_log_lock);
7925 
7926 	err = get_tracing_log_err(tr, len);
7927 	if (PTR_ERR(err) == -ENOMEM)
7928 		return;
7929 
7930 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7931 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7932 
7933 	err->info.errs = errs;
7934 	err->info.type = type;
7935 	err->info.pos = pos;
7936 	err->info.ts = local_clock();
7937 
7938 	list_add_tail(&err->list, &tr->err_log);
7939 }
7940 
7941 static void clear_tracing_err_log(struct trace_array *tr)
7942 {
7943 	struct tracing_log_err *err, *next;
7944 
7945 	mutex_lock(&tracing_err_log_lock);
7946 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7947 		list_del(&err->list);
7948 		free_tracing_log_err(err);
7949 	}
7950 
7951 	tr->n_err_log_entries = 0;
7952 	mutex_unlock(&tracing_err_log_lock);
7953 }
7954 
7955 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7956 {
7957 	struct trace_array *tr = m->private;
7958 
7959 	mutex_lock(&tracing_err_log_lock);
7960 
7961 	return seq_list_start(&tr->err_log, *pos);
7962 }
7963 
7964 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7965 {
7966 	struct trace_array *tr = m->private;
7967 
7968 	return seq_list_next(v, &tr->err_log, pos);
7969 }
7970 
7971 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7972 {
7973 	mutex_unlock(&tracing_err_log_lock);
7974 }
7975 
7976 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7977 {
7978 	u16 i;
7979 
7980 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7981 		seq_putc(m, ' ');
7982 	for (i = 0; i < pos; i++)
7983 		seq_putc(m, ' ');
7984 	seq_puts(m, "^\n");
7985 }
7986 
7987 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7988 {
7989 	struct tracing_log_err *err = v;
7990 
7991 	if (err) {
7992 		const char *err_text = err->info.errs[err->info.type];
7993 		u64 sec = err->info.ts;
7994 		u32 nsec;
7995 
7996 		nsec = do_div(sec, NSEC_PER_SEC);
7997 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7998 			   err->loc, err_text);
7999 		seq_printf(m, "%s", err->cmd);
8000 		tracing_err_log_show_pos(m, err->info.pos);
8001 	}
8002 
8003 	return 0;
8004 }
8005 
8006 static const struct seq_operations tracing_err_log_seq_ops = {
8007 	.start  = tracing_err_log_seq_start,
8008 	.next   = tracing_err_log_seq_next,
8009 	.stop   = tracing_err_log_seq_stop,
8010 	.show   = tracing_err_log_seq_show
8011 };
8012 
8013 static int tracing_err_log_open(struct inode *inode, struct file *file)
8014 {
8015 	struct trace_array *tr = inode->i_private;
8016 	int ret = 0;
8017 
8018 	ret = tracing_check_open_get_tr(tr);
8019 	if (ret)
8020 		return ret;
8021 
8022 	/* If this file was opened for write, then erase contents */
8023 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8024 		clear_tracing_err_log(tr);
8025 
8026 	if (file->f_mode & FMODE_READ) {
8027 		ret = seq_open(file, &tracing_err_log_seq_ops);
8028 		if (!ret) {
8029 			struct seq_file *m = file->private_data;
8030 			m->private = tr;
8031 		} else {
8032 			trace_array_put(tr);
8033 		}
8034 	}
8035 	return ret;
8036 }
8037 
8038 static ssize_t tracing_err_log_write(struct file *file,
8039 				     const char __user *buffer,
8040 				     size_t count, loff_t *ppos)
8041 {
8042 	return count;
8043 }
8044 
8045 static int tracing_err_log_release(struct inode *inode, struct file *file)
8046 {
8047 	struct trace_array *tr = inode->i_private;
8048 
8049 	trace_array_put(tr);
8050 
8051 	if (file->f_mode & FMODE_READ)
8052 		seq_release(inode, file);
8053 
8054 	return 0;
8055 }
8056 
8057 static const struct file_operations tracing_err_log_fops = {
8058 	.open           = tracing_err_log_open,
8059 	.write		= tracing_err_log_write,
8060 	.read           = seq_read,
8061 	.llseek         = tracing_lseek,
8062 	.release        = tracing_err_log_release,
8063 };
8064 
8065 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8066 {
8067 	struct trace_array *tr = inode->i_private;
8068 	struct ftrace_buffer_info *info;
8069 	int ret;
8070 
8071 	ret = tracing_check_open_get_tr(tr);
8072 	if (ret)
8073 		return ret;
8074 
8075 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8076 	if (!info) {
8077 		trace_array_put(tr);
8078 		return -ENOMEM;
8079 	}
8080 
8081 	mutex_lock(&trace_types_lock);
8082 
8083 	info->iter.tr		= tr;
8084 	info->iter.cpu_file	= tracing_get_cpu(inode);
8085 	info->iter.trace	= tr->current_trace;
8086 	info->iter.array_buffer = &tr->array_buffer;
8087 	info->spare		= NULL;
8088 	/* Force reading ring buffer for first read */
8089 	info->read		= (unsigned int)-1;
8090 
8091 	filp->private_data = info;
8092 
8093 	tr->trace_ref++;
8094 
8095 	mutex_unlock(&trace_types_lock);
8096 
8097 	ret = nonseekable_open(inode, filp);
8098 	if (ret < 0)
8099 		trace_array_put(tr);
8100 
8101 	return ret;
8102 }
8103 
8104 static __poll_t
8105 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8106 {
8107 	struct ftrace_buffer_info *info = filp->private_data;
8108 	struct trace_iterator *iter = &info->iter;
8109 
8110 	return trace_poll(iter, filp, poll_table);
8111 }
8112 
8113 static ssize_t
8114 tracing_buffers_read(struct file *filp, char __user *ubuf,
8115 		     size_t count, loff_t *ppos)
8116 {
8117 	struct ftrace_buffer_info *info = filp->private_data;
8118 	struct trace_iterator *iter = &info->iter;
8119 	void *trace_data;
8120 	int page_size;
8121 	ssize_t ret = 0;
8122 	ssize_t size;
8123 
8124 	if (!count)
8125 		return 0;
8126 
8127 #ifdef CONFIG_TRACER_MAX_TRACE
8128 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8129 		return -EBUSY;
8130 #endif
8131 
8132 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8133 
8134 	/* Make sure the spare matches the current sub buffer size */
8135 	if (info->spare) {
8136 		if (page_size != info->spare_size) {
8137 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8138 						   info->spare_cpu, info->spare);
8139 			info->spare = NULL;
8140 		}
8141 	}
8142 
8143 	if (!info->spare) {
8144 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8145 							  iter->cpu_file);
8146 		if (IS_ERR(info->spare)) {
8147 			ret = PTR_ERR(info->spare);
8148 			info->spare = NULL;
8149 		} else {
8150 			info->spare_cpu = iter->cpu_file;
8151 			info->spare_size = page_size;
8152 		}
8153 	}
8154 	if (!info->spare)
8155 		return ret;
8156 
8157 	/* Do we have previous read data to read? */
8158 	if (info->read < page_size)
8159 		goto read;
8160 
8161  again:
8162 	trace_access_lock(iter->cpu_file);
8163 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8164 				    info->spare,
8165 				    count,
8166 				    iter->cpu_file, 0);
8167 	trace_access_unlock(iter->cpu_file);
8168 
8169 	if (ret < 0) {
8170 		if (trace_empty(iter) && !iter->closed) {
8171 			if ((filp->f_flags & O_NONBLOCK))
8172 				return -EAGAIN;
8173 
8174 			ret = wait_on_pipe(iter, 0);
8175 			if (ret)
8176 				return ret;
8177 
8178 			goto again;
8179 		}
8180 		return 0;
8181 	}
8182 
8183 	info->read = 0;
8184  read:
8185 	size = page_size - info->read;
8186 	if (size > count)
8187 		size = count;
8188 	trace_data = ring_buffer_read_page_data(info->spare);
8189 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8190 	if (ret == size)
8191 		return -EFAULT;
8192 
8193 	size -= ret;
8194 
8195 	*ppos += size;
8196 	info->read += size;
8197 
8198 	return size;
8199 }
8200 
8201 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8202 {
8203 	struct ftrace_buffer_info *info = file->private_data;
8204 	struct trace_iterator *iter = &info->iter;
8205 
8206 	iter->closed = true;
8207 	/* Make sure the waiters see the new wait_index */
8208 	(void)atomic_fetch_inc_release(&iter->wait_index);
8209 
8210 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8211 
8212 	return 0;
8213 }
8214 
8215 static int tracing_buffers_release(struct inode *inode, struct file *file)
8216 {
8217 	struct ftrace_buffer_info *info = file->private_data;
8218 	struct trace_iterator *iter = &info->iter;
8219 
8220 	mutex_lock(&trace_types_lock);
8221 
8222 	iter->tr->trace_ref--;
8223 
8224 	__trace_array_put(iter->tr);
8225 
8226 	if (info->spare)
8227 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8228 					   info->spare_cpu, info->spare);
8229 	kvfree(info);
8230 
8231 	mutex_unlock(&trace_types_lock);
8232 
8233 	return 0;
8234 }
8235 
8236 struct buffer_ref {
8237 	struct trace_buffer	*buffer;
8238 	void			*page;
8239 	int			cpu;
8240 	refcount_t		refcount;
8241 };
8242 
8243 static void buffer_ref_release(struct buffer_ref *ref)
8244 {
8245 	if (!refcount_dec_and_test(&ref->refcount))
8246 		return;
8247 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8248 	kfree(ref);
8249 }
8250 
8251 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8252 				    struct pipe_buffer *buf)
8253 {
8254 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8255 
8256 	buffer_ref_release(ref);
8257 	buf->private = 0;
8258 }
8259 
8260 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8261 				struct pipe_buffer *buf)
8262 {
8263 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8264 
8265 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8266 		return false;
8267 
8268 	refcount_inc(&ref->refcount);
8269 	return true;
8270 }
8271 
8272 /* Pipe buffer operations for a buffer. */
8273 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8274 	.release		= buffer_pipe_buf_release,
8275 	.get			= buffer_pipe_buf_get,
8276 };
8277 
8278 /*
8279  * Callback from splice_to_pipe(), if we need to release some pages
8280  * at the end of the spd in case we error'ed out in filling the pipe.
8281  */
8282 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8283 {
8284 	struct buffer_ref *ref =
8285 		(struct buffer_ref *)spd->partial[i].private;
8286 
8287 	buffer_ref_release(ref);
8288 	spd->partial[i].private = 0;
8289 }
8290 
8291 static ssize_t
8292 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8293 			    struct pipe_inode_info *pipe, size_t len,
8294 			    unsigned int flags)
8295 {
8296 	struct ftrace_buffer_info *info = file->private_data;
8297 	struct trace_iterator *iter = &info->iter;
8298 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8299 	struct page *pages_def[PIPE_DEF_BUFFERS];
8300 	struct splice_pipe_desc spd = {
8301 		.pages		= pages_def,
8302 		.partial	= partial_def,
8303 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8304 		.ops		= &buffer_pipe_buf_ops,
8305 		.spd_release	= buffer_spd_release,
8306 	};
8307 	struct buffer_ref *ref;
8308 	bool woken = false;
8309 	int page_size;
8310 	int entries, i;
8311 	ssize_t ret = 0;
8312 
8313 #ifdef CONFIG_TRACER_MAX_TRACE
8314 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8315 		return -EBUSY;
8316 #endif
8317 
8318 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8319 	if (*ppos & (page_size - 1))
8320 		return -EINVAL;
8321 
8322 	if (len & (page_size - 1)) {
8323 		if (len < page_size)
8324 			return -EINVAL;
8325 		len &= (~(page_size - 1));
8326 	}
8327 
8328 	if (splice_grow_spd(pipe, &spd))
8329 		return -ENOMEM;
8330 
8331  again:
8332 	trace_access_lock(iter->cpu_file);
8333 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8334 
8335 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8336 		struct page *page;
8337 		int r;
8338 
8339 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8340 		if (!ref) {
8341 			ret = -ENOMEM;
8342 			break;
8343 		}
8344 
8345 		refcount_set(&ref->refcount, 1);
8346 		ref->buffer = iter->array_buffer->buffer;
8347 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8348 		if (IS_ERR(ref->page)) {
8349 			ret = PTR_ERR(ref->page);
8350 			ref->page = NULL;
8351 			kfree(ref);
8352 			break;
8353 		}
8354 		ref->cpu = iter->cpu_file;
8355 
8356 		r = ring_buffer_read_page(ref->buffer, ref->page,
8357 					  len, iter->cpu_file, 1);
8358 		if (r < 0) {
8359 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8360 						   ref->page);
8361 			kfree(ref);
8362 			break;
8363 		}
8364 
8365 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8366 
8367 		spd.pages[i] = page;
8368 		spd.partial[i].len = page_size;
8369 		spd.partial[i].offset = 0;
8370 		spd.partial[i].private = (unsigned long)ref;
8371 		spd.nr_pages++;
8372 		*ppos += page_size;
8373 
8374 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8375 	}
8376 
8377 	trace_access_unlock(iter->cpu_file);
8378 	spd.nr_pages = i;
8379 
8380 	/* did we read anything? */
8381 	if (!spd.nr_pages) {
8382 
8383 		if (ret)
8384 			goto out;
8385 
8386 		if (woken)
8387 			goto out;
8388 
8389 		ret = -EAGAIN;
8390 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8391 			goto out;
8392 
8393 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8394 		if (ret)
8395 			goto out;
8396 
8397 		/* No need to wait after waking up when tracing is off */
8398 		if (!tracer_tracing_is_on(iter->tr))
8399 			goto out;
8400 
8401 		/* Iterate one more time to collect any new data then exit */
8402 		woken = true;
8403 
8404 		goto again;
8405 	}
8406 
8407 	ret = splice_to_pipe(pipe, &spd);
8408 out:
8409 	splice_shrink_spd(&spd);
8410 
8411 	return ret;
8412 }
8413 
8414 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8415 {
8416 	struct ftrace_buffer_info *info = file->private_data;
8417 	struct trace_iterator *iter = &info->iter;
8418 	int err;
8419 
8420 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8421 		if (!(file->f_flags & O_NONBLOCK)) {
8422 			err = ring_buffer_wait(iter->array_buffer->buffer,
8423 					       iter->cpu_file,
8424 					       iter->tr->buffer_percent,
8425 					       NULL, NULL);
8426 			if (err)
8427 				return err;
8428 		}
8429 
8430 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8431 						  iter->cpu_file);
8432 	} else if (cmd) {
8433 		return -ENOTTY;
8434 	}
8435 
8436 	/*
8437 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8438 	 * waiters
8439 	 */
8440 	mutex_lock(&trace_types_lock);
8441 
8442 	/* Make sure the waiters see the new wait_index */
8443 	(void)atomic_fetch_inc_release(&iter->wait_index);
8444 
8445 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8446 
8447 	mutex_unlock(&trace_types_lock);
8448 	return 0;
8449 }
8450 
8451 #ifdef CONFIG_TRACER_MAX_TRACE
8452 static int get_snapshot_map(struct trace_array *tr)
8453 {
8454 	int err = 0;
8455 
8456 	/*
8457 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8458 	 * take trace_types_lock. Instead use the specific
8459 	 * snapshot_trigger_lock.
8460 	 */
8461 	spin_lock(&tr->snapshot_trigger_lock);
8462 
8463 	if (tr->snapshot || tr->mapped == UINT_MAX)
8464 		err = -EBUSY;
8465 	else
8466 		tr->mapped++;
8467 
8468 	spin_unlock(&tr->snapshot_trigger_lock);
8469 
8470 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8471 	if (tr->mapped == 1)
8472 		synchronize_rcu();
8473 
8474 	return err;
8475 
8476 }
8477 static void put_snapshot_map(struct trace_array *tr)
8478 {
8479 	spin_lock(&tr->snapshot_trigger_lock);
8480 	if (!WARN_ON(!tr->mapped))
8481 		tr->mapped--;
8482 	spin_unlock(&tr->snapshot_trigger_lock);
8483 }
8484 #else
8485 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8486 static inline void put_snapshot_map(struct trace_array *tr) { }
8487 #endif
8488 
8489 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8490 {
8491 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8492 	struct trace_iterator *iter = &info->iter;
8493 
8494 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8495 	put_snapshot_map(iter->tr);
8496 }
8497 
8498 static const struct vm_operations_struct tracing_buffers_vmops = {
8499 	.close		= tracing_buffers_mmap_close,
8500 };
8501 
8502 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8503 {
8504 	struct ftrace_buffer_info *info = filp->private_data;
8505 	struct trace_iterator *iter = &info->iter;
8506 	int ret = 0;
8507 
8508 	/* Currently the boot mapped buffer is not supported for mmap */
8509 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8510 		return -ENODEV;
8511 
8512 	ret = get_snapshot_map(iter->tr);
8513 	if (ret)
8514 		return ret;
8515 
8516 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8517 	if (ret)
8518 		put_snapshot_map(iter->tr);
8519 
8520 	vma->vm_ops = &tracing_buffers_vmops;
8521 
8522 	return ret;
8523 }
8524 
8525 static const struct file_operations tracing_buffers_fops = {
8526 	.open		= tracing_buffers_open,
8527 	.read		= tracing_buffers_read,
8528 	.poll		= tracing_buffers_poll,
8529 	.release	= tracing_buffers_release,
8530 	.flush		= tracing_buffers_flush,
8531 	.splice_read	= tracing_buffers_splice_read,
8532 	.unlocked_ioctl = tracing_buffers_ioctl,
8533 	.mmap		= tracing_buffers_mmap,
8534 };
8535 
8536 static ssize_t
8537 tracing_stats_read(struct file *filp, char __user *ubuf,
8538 		   size_t count, loff_t *ppos)
8539 {
8540 	struct inode *inode = file_inode(filp);
8541 	struct trace_array *tr = inode->i_private;
8542 	struct array_buffer *trace_buf = &tr->array_buffer;
8543 	int cpu = tracing_get_cpu(inode);
8544 	struct trace_seq *s;
8545 	unsigned long cnt;
8546 	unsigned long long t;
8547 	unsigned long usec_rem;
8548 
8549 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8550 	if (!s)
8551 		return -ENOMEM;
8552 
8553 	trace_seq_init(s);
8554 
8555 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8556 	trace_seq_printf(s, "entries: %ld\n", cnt);
8557 
8558 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8559 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8560 
8561 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8562 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8563 
8564 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8565 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8566 
8567 	if (trace_clocks[tr->clock_id].in_ns) {
8568 		/* local or global for trace_clock */
8569 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8570 		usec_rem = do_div(t, USEC_PER_SEC);
8571 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8572 								t, usec_rem);
8573 
8574 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8575 		usec_rem = do_div(t, USEC_PER_SEC);
8576 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8577 	} else {
8578 		/* counter or tsc mode for trace_clock */
8579 		trace_seq_printf(s, "oldest event ts: %llu\n",
8580 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8581 
8582 		trace_seq_printf(s, "now ts: %llu\n",
8583 				ring_buffer_time_stamp(trace_buf->buffer));
8584 	}
8585 
8586 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8587 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8588 
8589 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8590 	trace_seq_printf(s, "read events: %ld\n", cnt);
8591 
8592 	count = simple_read_from_buffer(ubuf, count, ppos,
8593 					s->buffer, trace_seq_used(s));
8594 
8595 	kfree(s);
8596 
8597 	return count;
8598 }
8599 
8600 static const struct file_operations tracing_stats_fops = {
8601 	.open		= tracing_open_generic_tr,
8602 	.read		= tracing_stats_read,
8603 	.llseek		= generic_file_llseek,
8604 	.release	= tracing_release_generic_tr,
8605 };
8606 
8607 #ifdef CONFIG_DYNAMIC_FTRACE
8608 
8609 static ssize_t
8610 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8611 		  size_t cnt, loff_t *ppos)
8612 {
8613 	ssize_t ret;
8614 	char *buf;
8615 	int r;
8616 
8617 	/* 512 should be plenty to hold the amount needed */
8618 #define DYN_INFO_BUF_SIZE	512
8619 
8620 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8621 	if (!buf)
8622 		return -ENOMEM;
8623 
8624 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8625 		      "%ld pages:%ld groups: %ld\n"
8626 		      "ftrace boot update time = %llu (ns)\n"
8627 		      "ftrace module total update time = %llu (ns)\n",
8628 		      ftrace_update_tot_cnt,
8629 		      ftrace_number_of_pages,
8630 		      ftrace_number_of_groups,
8631 		      ftrace_update_time,
8632 		      ftrace_total_mod_time);
8633 
8634 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8635 	kfree(buf);
8636 	return ret;
8637 }
8638 
8639 static const struct file_operations tracing_dyn_info_fops = {
8640 	.open		= tracing_open_generic,
8641 	.read		= tracing_read_dyn_info,
8642 	.llseek		= generic_file_llseek,
8643 };
8644 #endif /* CONFIG_DYNAMIC_FTRACE */
8645 
8646 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8647 static void
8648 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8649 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8650 		void *data)
8651 {
8652 	tracing_snapshot_instance(tr);
8653 }
8654 
8655 static void
8656 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8657 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8658 		      void *data)
8659 {
8660 	struct ftrace_func_mapper *mapper = data;
8661 	long *count = NULL;
8662 
8663 	if (mapper)
8664 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8665 
8666 	if (count) {
8667 
8668 		if (*count <= 0)
8669 			return;
8670 
8671 		(*count)--;
8672 	}
8673 
8674 	tracing_snapshot_instance(tr);
8675 }
8676 
8677 static int
8678 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8679 		      struct ftrace_probe_ops *ops, void *data)
8680 {
8681 	struct ftrace_func_mapper *mapper = data;
8682 	long *count = NULL;
8683 
8684 	seq_printf(m, "%ps:", (void *)ip);
8685 
8686 	seq_puts(m, "snapshot");
8687 
8688 	if (mapper)
8689 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8690 
8691 	if (count)
8692 		seq_printf(m, ":count=%ld\n", *count);
8693 	else
8694 		seq_puts(m, ":unlimited\n");
8695 
8696 	return 0;
8697 }
8698 
8699 static int
8700 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8701 		     unsigned long ip, void *init_data, void **data)
8702 {
8703 	struct ftrace_func_mapper *mapper = *data;
8704 
8705 	if (!mapper) {
8706 		mapper = allocate_ftrace_func_mapper();
8707 		if (!mapper)
8708 			return -ENOMEM;
8709 		*data = mapper;
8710 	}
8711 
8712 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8713 }
8714 
8715 static void
8716 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8717 		     unsigned long ip, void *data)
8718 {
8719 	struct ftrace_func_mapper *mapper = data;
8720 
8721 	if (!ip) {
8722 		if (!mapper)
8723 			return;
8724 		free_ftrace_func_mapper(mapper, NULL);
8725 		return;
8726 	}
8727 
8728 	ftrace_func_mapper_remove_ip(mapper, ip);
8729 }
8730 
8731 static struct ftrace_probe_ops snapshot_probe_ops = {
8732 	.func			= ftrace_snapshot,
8733 	.print			= ftrace_snapshot_print,
8734 };
8735 
8736 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8737 	.func			= ftrace_count_snapshot,
8738 	.print			= ftrace_snapshot_print,
8739 	.init			= ftrace_snapshot_init,
8740 	.free			= ftrace_snapshot_free,
8741 };
8742 
8743 static int
8744 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8745 			       char *glob, char *cmd, char *param, int enable)
8746 {
8747 	struct ftrace_probe_ops *ops;
8748 	void *count = (void *)-1;
8749 	char *number;
8750 	int ret;
8751 
8752 	if (!tr)
8753 		return -ENODEV;
8754 
8755 	/* hash funcs only work with set_ftrace_filter */
8756 	if (!enable)
8757 		return -EINVAL;
8758 
8759 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8760 
8761 	if (glob[0] == '!') {
8762 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8763 		if (!ret)
8764 			tracing_disarm_snapshot(tr);
8765 
8766 		return ret;
8767 	}
8768 
8769 	if (!param)
8770 		goto out_reg;
8771 
8772 	number = strsep(&param, ":");
8773 
8774 	if (!strlen(number))
8775 		goto out_reg;
8776 
8777 	/*
8778 	 * We use the callback data field (which is a pointer)
8779 	 * as our counter.
8780 	 */
8781 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8782 	if (ret)
8783 		return ret;
8784 
8785  out_reg:
8786 	ret = tracing_arm_snapshot(tr);
8787 	if (ret < 0)
8788 		goto out;
8789 
8790 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8791 	if (ret < 0)
8792 		tracing_disarm_snapshot(tr);
8793  out:
8794 	return ret < 0 ? ret : 0;
8795 }
8796 
8797 static struct ftrace_func_command ftrace_snapshot_cmd = {
8798 	.name			= "snapshot",
8799 	.func			= ftrace_trace_snapshot_callback,
8800 };
8801 
8802 static __init int register_snapshot_cmd(void)
8803 {
8804 	return register_ftrace_command(&ftrace_snapshot_cmd);
8805 }
8806 #else
8807 static inline __init int register_snapshot_cmd(void) { return 0; }
8808 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8809 
8810 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8811 {
8812 	if (WARN_ON(!tr->dir))
8813 		return ERR_PTR(-ENODEV);
8814 
8815 	/* Top directory uses NULL as the parent */
8816 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8817 		return NULL;
8818 
8819 	/* All sub buffers have a descriptor */
8820 	return tr->dir;
8821 }
8822 
8823 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8824 {
8825 	struct dentry *d_tracer;
8826 
8827 	if (tr->percpu_dir)
8828 		return tr->percpu_dir;
8829 
8830 	d_tracer = tracing_get_dentry(tr);
8831 	if (IS_ERR(d_tracer))
8832 		return NULL;
8833 
8834 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8835 
8836 	MEM_FAIL(!tr->percpu_dir,
8837 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8838 
8839 	return tr->percpu_dir;
8840 }
8841 
8842 static struct dentry *
8843 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8844 		      void *data, long cpu, const struct file_operations *fops)
8845 {
8846 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8847 
8848 	if (ret) /* See tracing_get_cpu() */
8849 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8850 	return ret;
8851 }
8852 
8853 static void
8854 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8855 {
8856 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8857 	struct dentry *d_cpu;
8858 	char cpu_dir[30]; /* 30 characters should be more than enough */
8859 
8860 	if (!d_percpu)
8861 		return;
8862 
8863 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8864 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8865 	if (!d_cpu) {
8866 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8867 		return;
8868 	}
8869 
8870 	/* per cpu trace_pipe */
8871 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8872 				tr, cpu, &tracing_pipe_fops);
8873 
8874 	/* per cpu trace */
8875 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8876 				tr, cpu, &tracing_fops);
8877 
8878 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8879 				tr, cpu, &tracing_buffers_fops);
8880 
8881 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8882 				tr, cpu, &tracing_stats_fops);
8883 
8884 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8885 				tr, cpu, &tracing_entries_fops);
8886 
8887 	if (tr->range_addr_start)
8888 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8889 				      tr, cpu, &tracing_buffer_meta_fops);
8890 #ifdef CONFIG_TRACER_SNAPSHOT
8891 	if (!tr->range_addr_start) {
8892 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8893 				      tr, cpu, &snapshot_fops);
8894 
8895 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8896 				      tr, cpu, &snapshot_raw_fops);
8897 	}
8898 #endif
8899 }
8900 
8901 #ifdef CONFIG_FTRACE_SELFTEST
8902 /* Let selftest have access to static functions in this file */
8903 #include "trace_selftest.c"
8904 #endif
8905 
8906 static ssize_t
8907 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8908 			loff_t *ppos)
8909 {
8910 	struct trace_option_dentry *topt = filp->private_data;
8911 	char *buf;
8912 
8913 	if (topt->flags->val & topt->opt->bit)
8914 		buf = "1\n";
8915 	else
8916 		buf = "0\n";
8917 
8918 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8919 }
8920 
8921 static ssize_t
8922 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8923 			 loff_t *ppos)
8924 {
8925 	struct trace_option_dentry *topt = filp->private_data;
8926 	unsigned long val;
8927 	int ret;
8928 
8929 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8930 	if (ret)
8931 		return ret;
8932 
8933 	if (val != 0 && val != 1)
8934 		return -EINVAL;
8935 
8936 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8937 		mutex_lock(&trace_types_lock);
8938 		ret = __set_tracer_option(topt->tr, topt->flags,
8939 					  topt->opt, !val);
8940 		mutex_unlock(&trace_types_lock);
8941 		if (ret)
8942 			return ret;
8943 	}
8944 
8945 	*ppos += cnt;
8946 
8947 	return cnt;
8948 }
8949 
8950 static int tracing_open_options(struct inode *inode, struct file *filp)
8951 {
8952 	struct trace_option_dentry *topt = inode->i_private;
8953 	int ret;
8954 
8955 	ret = tracing_check_open_get_tr(topt->tr);
8956 	if (ret)
8957 		return ret;
8958 
8959 	filp->private_data = inode->i_private;
8960 	return 0;
8961 }
8962 
8963 static int tracing_release_options(struct inode *inode, struct file *file)
8964 {
8965 	struct trace_option_dentry *topt = file->private_data;
8966 
8967 	trace_array_put(topt->tr);
8968 	return 0;
8969 }
8970 
8971 static const struct file_operations trace_options_fops = {
8972 	.open = tracing_open_options,
8973 	.read = trace_options_read,
8974 	.write = trace_options_write,
8975 	.llseek	= generic_file_llseek,
8976 	.release = tracing_release_options,
8977 };
8978 
8979 /*
8980  * In order to pass in both the trace_array descriptor as well as the index
8981  * to the flag that the trace option file represents, the trace_array
8982  * has a character array of trace_flags_index[], which holds the index
8983  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8984  * The address of this character array is passed to the flag option file
8985  * read/write callbacks.
8986  *
8987  * In order to extract both the index and the trace_array descriptor,
8988  * get_tr_index() uses the following algorithm.
8989  *
8990  *   idx = *ptr;
8991  *
8992  * As the pointer itself contains the address of the index (remember
8993  * index[1] == 1).
8994  *
8995  * Then to get the trace_array descriptor, by subtracting that index
8996  * from the ptr, we get to the start of the index itself.
8997  *
8998  *   ptr - idx == &index[0]
8999  *
9000  * Then a simple container_of() from that pointer gets us to the
9001  * trace_array descriptor.
9002  */
9003 static void get_tr_index(void *data, struct trace_array **ptr,
9004 			 unsigned int *pindex)
9005 {
9006 	*pindex = *(unsigned char *)data;
9007 
9008 	*ptr = container_of(data - *pindex, struct trace_array,
9009 			    trace_flags_index);
9010 }
9011 
9012 static ssize_t
9013 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9014 			loff_t *ppos)
9015 {
9016 	void *tr_index = filp->private_data;
9017 	struct trace_array *tr;
9018 	unsigned int index;
9019 	char *buf;
9020 
9021 	get_tr_index(tr_index, &tr, &index);
9022 
9023 	if (tr->trace_flags & (1 << index))
9024 		buf = "1\n";
9025 	else
9026 		buf = "0\n";
9027 
9028 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9029 }
9030 
9031 static ssize_t
9032 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9033 			 loff_t *ppos)
9034 {
9035 	void *tr_index = filp->private_data;
9036 	struct trace_array *tr;
9037 	unsigned int index;
9038 	unsigned long val;
9039 	int ret;
9040 
9041 	get_tr_index(tr_index, &tr, &index);
9042 
9043 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9044 	if (ret)
9045 		return ret;
9046 
9047 	if (val != 0 && val != 1)
9048 		return -EINVAL;
9049 
9050 	mutex_lock(&event_mutex);
9051 	mutex_lock(&trace_types_lock);
9052 	ret = set_tracer_flag(tr, 1 << index, val);
9053 	mutex_unlock(&trace_types_lock);
9054 	mutex_unlock(&event_mutex);
9055 
9056 	if (ret < 0)
9057 		return ret;
9058 
9059 	*ppos += cnt;
9060 
9061 	return cnt;
9062 }
9063 
9064 static const struct file_operations trace_options_core_fops = {
9065 	.open = tracing_open_generic,
9066 	.read = trace_options_core_read,
9067 	.write = trace_options_core_write,
9068 	.llseek = generic_file_llseek,
9069 };
9070 
9071 struct dentry *trace_create_file(const char *name,
9072 				 umode_t mode,
9073 				 struct dentry *parent,
9074 				 void *data,
9075 				 const struct file_operations *fops)
9076 {
9077 	struct dentry *ret;
9078 
9079 	ret = tracefs_create_file(name, mode, parent, data, fops);
9080 	if (!ret)
9081 		pr_warn("Could not create tracefs '%s' entry\n", name);
9082 
9083 	return ret;
9084 }
9085 
9086 
9087 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9088 {
9089 	struct dentry *d_tracer;
9090 
9091 	if (tr->options)
9092 		return tr->options;
9093 
9094 	d_tracer = tracing_get_dentry(tr);
9095 	if (IS_ERR(d_tracer))
9096 		return NULL;
9097 
9098 	tr->options = tracefs_create_dir("options", d_tracer);
9099 	if (!tr->options) {
9100 		pr_warn("Could not create tracefs directory 'options'\n");
9101 		return NULL;
9102 	}
9103 
9104 	return tr->options;
9105 }
9106 
9107 static void
9108 create_trace_option_file(struct trace_array *tr,
9109 			 struct trace_option_dentry *topt,
9110 			 struct tracer_flags *flags,
9111 			 struct tracer_opt *opt)
9112 {
9113 	struct dentry *t_options;
9114 
9115 	t_options = trace_options_init_dentry(tr);
9116 	if (!t_options)
9117 		return;
9118 
9119 	topt->flags = flags;
9120 	topt->opt = opt;
9121 	topt->tr = tr;
9122 
9123 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9124 					t_options, topt, &trace_options_fops);
9125 
9126 }
9127 
9128 static void
9129 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9130 {
9131 	struct trace_option_dentry *topts;
9132 	struct trace_options *tr_topts;
9133 	struct tracer_flags *flags;
9134 	struct tracer_opt *opts;
9135 	int cnt;
9136 	int i;
9137 
9138 	if (!tracer)
9139 		return;
9140 
9141 	flags = tracer->flags;
9142 
9143 	if (!flags || !flags->opts)
9144 		return;
9145 
9146 	/*
9147 	 * If this is an instance, only create flags for tracers
9148 	 * the instance may have.
9149 	 */
9150 	if (!trace_ok_for_array(tracer, tr))
9151 		return;
9152 
9153 	for (i = 0; i < tr->nr_topts; i++) {
9154 		/* Make sure there's no duplicate flags. */
9155 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9156 			return;
9157 	}
9158 
9159 	opts = flags->opts;
9160 
9161 	for (cnt = 0; opts[cnt].name; cnt++)
9162 		;
9163 
9164 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9165 	if (!topts)
9166 		return;
9167 
9168 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9169 			    GFP_KERNEL);
9170 	if (!tr_topts) {
9171 		kfree(topts);
9172 		return;
9173 	}
9174 
9175 	tr->topts = tr_topts;
9176 	tr->topts[tr->nr_topts].tracer = tracer;
9177 	tr->topts[tr->nr_topts].topts = topts;
9178 	tr->nr_topts++;
9179 
9180 	for (cnt = 0; opts[cnt].name; cnt++) {
9181 		create_trace_option_file(tr, &topts[cnt], flags,
9182 					 &opts[cnt]);
9183 		MEM_FAIL(topts[cnt].entry == NULL,
9184 			  "Failed to create trace option: %s",
9185 			  opts[cnt].name);
9186 	}
9187 }
9188 
9189 static struct dentry *
9190 create_trace_option_core_file(struct trace_array *tr,
9191 			      const char *option, long index)
9192 {
9193 	struct dentry *t_options;
9194 
9195 	t_options = trace_options_init_dentry(tr);
9196 	if (!t_options)
9197 		return NULL;
9198 
9199 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9200 				 (void *)&tr->trace_flags_index[index],
9201 				 &trace_options_core_fops);
9202 }
9203 
9204 static void create_trace_options_dir(struct trace_array *tr)
9205 {
9206 	struct dentry *t_options;
9207 	bool top_level = tr == &global_trace;
9208 	int i;
9209 
9210 	t_options = trace_options_init_dentry(tr);
9211 	if (!t_options)
9212 		return;
9213 
9214 	for (i = 0; trace_options[i]; i++) {
9215 		if (top_level ||
9216 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9217 			create_trace_option_core_file(tr, trace_options[i], i);
9218 	}
9219 }
9220 
9221 static ssize_t
9222 rb_simple_read(struct file *filp, char __user *ubuf,
9223 	       size_t cnt, loff_t *ppos)
9224 {
9225 	struct trace_array *tr = filp->private_data;
9226 	char buf[64];
9227 	int r;
9228 
9229 	r = tracer_tracing_is_on(tr);
9230 	r = sprintf(buf, "%d\n", r);
9231 
9232 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9233 }
9234 
9235 static ssize_t
9236 rb_simple_write(struct file *filp, const char __user *ubuf,
9237 		size_t cnt, loff_t *ppos)
9238 {
9239 	struct trace_array *tr = filp->private_data;
9240 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9241 	unsigned long val;
9242 	int ret;
9243 
9244 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9245 	if (ret)
9246 		return ret;
9247 
9248 	if (buffer) {
9249 		mutex_lock(&trace_types_lock);
9250 		if (!!val == tracer_tracing_is_on(tr)) {
9251 			val = 0; /* do nothing */
9252 		} else if (val) {
9253 			tracer_tracing_on(tr);
9254 			if (tr->current_trace->start)
9255 				tr->current_trace->start(tr);
9256 		} else {
9257 			tracer_tracing_off(tr);
9258 			if (tr->current_trace->stop)
9259 				tr->current_trace->stop(tr);
9260 			/* Wake up any waiters */
9261 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9262 		}
9263 		mutex_unlock(&trace_types_lock);
9264 	}
9265 
9266 	(*ppos)++;
9267 
9268 	return cnt;
9269 }
9270 
9271 static const struct file_operations rb_simple_fops = {
9272 	.open		= tracing_open_generic_tr,
9273 	.read		= rb_simple_read,
9274 	.write		= rb_simple_write,
9275 	.release	= tracing_release_generic_tr,
9276 	.llseek		= default_llseek,
9277 };
9278 
9279 static ssize_t
9280 buffer_percent_read(struct file *filp, char __user *ubuf,
9281 		    size_t cnt, loff_t *ppos)
9282 {
9283 	struct trace_array *tr = filp->private_data;
9284 	char buf[64];
9285 	int r;
9286 
9287 	r = tr->buffer_percent;
9288 	r = sprintf(buf, "%d\n", r);
9289 
9290 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9291 }
9292 
9293 static ssize_t
9294 buffer_percent_write(struct file *filp, const char __user *ubuf,
9295 		     size_t cnt, loff_t *ppos)
9296 {
9297 	struct trace_array *tr = filp->private_data;
9298 	unsigned long val;
9299 	int ret;
9300 
9301 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9302 	if (ret)
9303 		return ret;
9304 
9305 	if (val > 100)
9306 		return -EINVAL;
9307 
9308 	tr->buffer_percent = val;
9309 
9310 	(*ppos)++;
9311 
9312 	return cnt;
9313 }
9314 
9315 static const struct file_operations buffer_percent_fops = {
9316 	.open		= tracing_open_generic_tr,
9317 	.read		= buffer_percent_read,
9318 	.write		= buffer_percent_write,
9319 	.release	= tracing_release_generic_tr,
9320 	.llseek		= default_llseek,
9321 };
9322 
9323 static ssize_t
9324 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9325 {
9326 	struct trace_array *tr = filp->private_data;
9327 	size_t size;
9328 	char buf[64];
9329 	int order;
9330 	int r;
9331 
9332 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9333 	size = (PAGE_SIZE << order) / 1024;
9334 
9335 	r = sprintf(buf, "%zd\n", size);
9336 
9337 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9338 }
9339 
9340 static ssize_t
9341 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9342 			 size_t cnt, loff_t *ppos)
9343 {
9344 	struct trace_array *tr = filp->private_data;
9345 	unsigned long val;
9346 	int old_order;
9347 	int order;
9348 	int pages;
9349 	int ret;
9350 
9351 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9352 	if (ret)
9353 		return ret;
9354 
9355 	val *= 1024; /* value passed in is in KB */
9356 
9357 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9358 	order = fls(pages - 1);
9359 
9360 	/* limit between 1 and 128 system pages */
9361 	if (order < 0 || order > 7)
9362 		return -EINVAL;
9363 
9364 	/* Do not allow tracing while changing the order of the ring buffer */
9365 	tracing_stop_tr(tr);
9366 
9367 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9368 	if (old_order == order)
9369 		goto out;
9370 
9371 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9372 	if (ret)
9373 		goto out;
9374 
9375 #ifdef CONFIG_TRACER_MAX_TRACE
9376 
9377 	if (!tr->allocated_snapshot)
9378 		goto out_max;
9379 
9380 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9381 	if (ret) {
9382 		/* Put back the old order */
9383 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9384 		if (WARN_ON_ONCE(cnt)) {
9385 			/*
9386 			 * AARGH! We are left with different orders!
9387 			 * The max buffer is our "snapshot" buffer.
9388 			 * When a tracer needs a snapshot (one of the
9389 			 * latency tracers), it swaps the max buffer
9390 			 * with the saved snap shot. We succeeded to
9391 			 * update the order of the main buffer, but failed to
9392 			 * update the order of the max buffer. But when we tried
9393 			 * to reset the main buffer to the original size, we
9394 			 * failed there too. This is very unlikely to
9395 			 * happen, but if it does, warn and kill all
9396 			 * tracing.
9397 			 */
9398 			tracing_disabled = 1;
9399 		}
9400 		goto out;
9401 	}
9402  out_max:
9403 #endif
9404 	(*ppos)++;
9405  out:
9406 	if (ret)
9407 		cnt = ret;
9408 	tracing_start_tr(tr);
9409 	return cnt;
9410 }
9411 
9412 static const struct file_operations buffer_subbuf_size_fops = {
9413 	.open		= tracing_open_generic_tr,
9414 	.read		= buffer_subbuf_size_read,
9415 	.write		= buffer_subbuf_size_write,
9416 	.release	= tracing_release_generic_tr,
9417 	.llseek		= default_llseek,
9418 };
9419 
9420 static struct dentry *trace_instance_dir;
9421 
9422 static void
9423 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9424 
9425 #ifdef CONFIG_MODULES
9426 static int make_mod_delta(struct module *mod, void *data)
9427 {
9428 	struct trace_module_delta *module_delta;
9429 	struct trace_scratch *tscratch;
9430 	struct trace_mod_entry *entry;
9431 	struct trace_array *tr = data;
9432 	int i;
9433 
9434 	tscratch = tr->scratch;
9435 	module_delta = READ_ONCE(tr->module_delta);
9436 	for (i = 0; i < tscratch->nr_entries; i++) {
9437 		entry = &tscratch->entries[i];
9438 		if (strcmp(mod->name, entry->mod_name))
9439 			continue;
9440 		if (mod->state == MODULE_STATE_GOING)
9441 			module_delta->delta[i] = 0;
9442 		else
9443 			module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9444 						 - entry->mod_addr;
9445 		break;
9446 	}
9447 	return 0;
9448 }
9449 #else
9450 static int make_mod_delta(struct module *mod, void *data)
9451 {
9452 	return 0;
9453 }
9454 #endif
9455 
9456 static int mod_addr_comp(const void *a, const void *b, const void *data)
9457 {
9458 	const struct trace_mod_entry *e1 = a;
9459 	const struct trace_mod_entry *e2 = b;
9460 
9461 	return e1->mod_addr > e2->mod_addr ? 1 : -1;
9462 }
9463 
9464 static void setup_trace_scratch(struct trace_array *tr,
9465 				struct trace_scratch *tscratch, unsigned int size)
9466 {
9467 	struct trace_module_delta *module_delta;
9468 	struct trace_mod_entry *entry;
9469 	int i, nr_entries;
9470 
9471 	if (!tscratch)
9472 		return;
9473 
9474 	tr->scratch = tscratch;
9475 	tr->scratch_size = size;
9476 
9477 	if (tscratch->text_addr)
9478 		tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9479 
9480 	if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9481 		goto reset;
9482 
9483 	/* Check if each module name is a valid string */
9484 	for (i = 0; i < tscratch->nr_entries; i++) {
9485 		int n;
9486 
9487 		entry = &tscratch->entries[i];
9488 
9489 		for (n = 0; n < MODULE_NAME_LEN; n++) {
9490 			if (entry->mod_name[n] == '\0')
9491 				break;
9492 			if (!isprint(entry->mod_name[n]))
9493 				goto reset;
9494 		}
9495 		if (n == MODULE_NAME_LEN)
9496 			goto reset;
9497 	}
9498 
9499 	/* Sort the entries so that we can find appropriate module from address. */
9500 	nr_entries = tscratch->nr_entries;
9501 	sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9502 	       mod_addr_comp, NULL, NULL);
9503 
9504 	if (IS_ENABLED(CONFIG_MODULES)) {
9505 		module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9506 		if (!module_delta) {
9507 			pr_info("module_delta allocation failed. Not able to decode module address.");
9508 			goto reset;
9509 		}
9510 		init_rcu_head(&module_delta->rcu);
9511 	} else
9512 		module_delta = NULL;
9513 	WRITE_ONCE(tr->module_delta, module_delta);
9514 
9515 	/* Scan modules to make text delta for modules. */
9516 	module_for_each_mod(make_mod_delta, tr);
9517 	return;
9518  reset:
9519 	/* Invalid trace modules */
9520 	memset(tscratch, 0, size);
9521 }
9522 
9523 static int
9524 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9525 {
9526 	enum ring_buffer_flags rb_flags;
9527 	struct trace_scratch *tscratch;
9528 	unsigned int scratch_size = 0;
9529 
9530 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9531 
9532 	buf->tr = tr;
9533 
9534 	if (tr->range_addr_start && tr->range_addr_size) {
9535 		/* Add scratch buffer to handle 128 modules */
9536 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9537 						      tr->range_addr_start,
9538 						      tr->range_addr_size,
9539 						      struct_size(tscratch, entries, 128));
9540 
9541 		tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9542 		setup_trace_scratch(tr, tscratch, scratch_size);
9543 
9544 		/*
9545 		 * This is basically the same as a mapped buffer,
9546 		 * with the same restrictions.
9547 		 */
9548 		tr->mapped++;
9549 	} else {
9550 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9551 	}
9552 	if (!buf->buffer)
9553 		return -ENOMEM;
9554 
9555 	buf->data = alloc_percpu(struct trace_array_cpu);
9556 	if (!buf->data) {
9557 		ring_buffer_free(buf->buffer);
9558 		buf->buffer = NULL;
9559 		return -ENOMEM;
9560 	}
9561 
9562 	/* Allocate the first page for all buffers */
9563 	set_buffer_entries(&tr->array_buffer,
9564 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9565 
9566 	return 0;
9567 }
9568 
9569 static void free_trace_buffer(struct array_buffer *buf)
9570 {
9571 	if (buf->buffer) {
9572 		ring_buffer_free(buf->buffer);
9573 		buf->buffer = NULL;
9574 		free_percpu(buf->data);
9575 		buf->data = NULL;
9576 	}
9577 }
9578 
9579 static int allocate_trace_buffers(struct trace_array *tr, int size)
9580 {
9581 	int ret;
9582 
9583 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9584 	if (ret)
9585 		return ret;
9586 
9587 #ifdef CONFIG_TRACER_MAX_TRACE
9588 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9589 	if (tr->range_addr_start)
9590 		return 0;
9591 
9592 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9593 				    allocate_snapshot ? size : 1);
9594 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9595 		free_trace_buffer(&tr->array_buffer);
9596 		return -ENOMEM;
9597 	}
9598 	tr->allocated_snapshot = allocate_snapshot;
9599 
9600 	allocate_snapshot = false;
9601 #endif
9602 
9603 	return 0;
9604 }
9605 
9606 static void free_trace_buffers(struct trace_array *tr)
9607 {
9608 	if (!tr)
9609 		return;
9610 
9611 	free_trace_buffer(&tr->array_buffer);
9612 	kfree(tr->module_delta);
9613 
9614 #ifdef CONFIG_TRACER_MAX_TRACE
9615 	free_trace_buffer(&tr->max_buffer);
9616 #endif
9617 
9618 	if (tr->range_addr_start)
9619 		vunmap((void *)tr->range_addr_start);
9620 }
9621 
9622 static void init_trace_flags_index(struct trace_array *tr)
9623 {
9624 	int i;
9625 
9626 	/* Used by the trace options files */
9627 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9628 		tr->trace_flags_index[i] = i;
9629 }
9630 
9631 static void __update_tracer_options(struct trace_array *tr)
9632 {
9633 	struct tracer *t;
9634 
9635 	for (t = trace_types; t; t = t->next)
9636 		add_tracer_options(tr, t);
9637 }
9638 
9639 static void update_tracer_options(struct trace_array *tr)
9640 {
9641 	mutex_lock(&trace_types_lock);
9642 	tracer_options_updated = true;
9643 	__update_tracer_options(tr);
9644 	mutex_unlock(&trace_types_lock);
9645 }
9646 
9647 /* Must have trace_types_lock held */
9648 struct trace_array *trace_array_find(const char *instance)
9649 {
9650 	struct trace_array *tr, *found = NULL;
9651 
9652 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9653 		if (tr->name && strcmp(tr->name, instance) == 0) {
9654 			found = tr;
9655 			break;
9656 		}
9657 	}
9658 
9659 	return found;
9660 }
9661 
9662 struct trace_array *trace_array_find_get(const char *instance)
9663 {
9664 	struct trace_array *tr;
9665 
9666 	mutex_lock(&trace_types_lock);
9667 	tr = trace_array_find(instance);
9668 	if (tr)
9669 		tr->ref++;
9670 	mutex_unlock(&trace_types_lock);
9671 
9672 	return tr;
9673 }
9674 
9675 static int trace_array_create_dir(struct trace_array *tr)
9676 {
9677 	int ret;
9678 
9679 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9680 	if (!tr->dir)
9681 		return -EINVAL;
9682 
9683 	ret = event_trace_add_tracer(tr->dir, tr);
9684 	if (ret) {
9685 		tracefs_remove(tr->dir);
9686 		return ret;
9687 	}
9688 
9689 	init_tracer_tracefs(tr, tr->dir);
9690 	__update_tracer_options(tr);
9691 
9692 	return ret;
9693 }
9694 
9695 static struct trace_array *
9696 trace_array_create_systems(const char *name, const char *systems,
9697 			   unsigned long range_addr_start,
9698 			   unsigned long range_addr_size)
9699 {
9700 	struct trace_array *tr;
9701 	int ret;
9702 
9703 	ret = -ENOMEM;
9704 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9705 	if (!tr)
9706 		return ERR_PTR(ret);
9707 
9708 	tr->name = kstrdup(name, GFP_KERNEL);
9709 	if (!tr->name)
9710 		goto out_free_tr;
9711 
9712 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9713 		goto out_free_tr;
9714 
9715 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9716 		goto out_free_tr;
9717 
9718 	if (systems) {
9719 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9720 		if (!tr->system_names)
9721 			goto out_free_tr;
9722 	}
9723 
9724 	/* Only for boot up memory mapped ring buffers */
9725 	tr->range_addr_start = range_addr_start;
9726 	tr->range_addr_size = range_addr_size;
9727 
9728 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9729 
9730 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9731 
9732 	raw_spin_lock_init(&tr->start_lock);
9733 
9734 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9735 #ifdef CONFIG_TRACER_MAX_TRACE
9736 	spin_lock_init(&tr->snapshot_trigger_lock);
9737 #endif
9738 	tr->current_trace = &nop_trace;
9739 
9740 	INIT_LIST_HEAD(&tr->systems);
9741 	INIT_LIST_HEAD(&tr->events);
9742 	INIT_LIST_HEAD(&tr->hist_vars);
9743 	INIT_LIST_HEAD(&tr->err_log);
9744 
9745 #ifdef CONFIG_MODULES
9746 	INIT_LIST_HEAD(&tr->mod_events);
9747 #endif
9748 
9749 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9750 		goto out_free_tr;
9751 
9752 	/* The ring buffer is defaultly expanded */
9753 	trace_set_ring_buffer_expanded(tr);
9754 
9755 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9756 		goto out_free_tr;
9757 
9758 	ftrace_init_trace_array(tr);
9759 
9760 	init_trace_flags_index(tr);
9761 
9762 	if (trace_instance_dir) {
9763 		ret = trace_array_create_dir(tr);
9764 		if (ret)
9765 			goto out_free_tr;
9766 	} else
9767 		__trace_early_add_events(tr);
9768 
9769 	list_add(&tr->list, &ftrace_trace_arrays);
9770 
9771 	tr->ref++;
9772 
9773 	return tr;
9774 
9775  out_free_tr:
9776 	ftrace_free_ftrace_ops(tr);
9777 	free_trace_buffers(tr);
9778 	free_cpumask_var(tr->pipe_cpumask);
9779 	free_cpumask_var(tr->tracing_cpumask);
9780 	kfree_const(tr->system_names);
9781 	kfree(tr->range_name);
9782 	kfree(tr->name);
9783 	kfree(tr);
9784 
9785 	return ERR_PTR(ret);
9786 }
9787 
9788 static struct trace_array *trace_array_create(const char *name)
9789 {
9790 	return trace_array_create_systems(name, NULL, 0, 0);
9791 }
9792 
9793 static int instance_mkdir(const char *name)
9794 {
9795 	struct trace_array *tr;
9796 	int ret;
9797 
9798 	guard(mutex)(&event_mutex);
9799 	guard(mutex)(&trace_types_lock);
9800 
9801 	ret = -EEXIST;
9802 	if (trace_array_find(name))
9803 		return -EEXIST;
9804 
9805 	tr = trace_array_create(name);
9806 
9807 	ret = PTR_ERR_OR_ZERO(tr);
9808 
9809 	return ret;
9810 }
9811 
9812 static u64 map_pages(u64 start, u64 size)
9813 {
9814 	struct page **pages;
9815 	phys_addr_t page_start;
9816 	unsigned int page_count;
9817 	unsigned int i;
9818 	void *vaddr;
9819 
9820 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9821 
9822 	page_start = start;
9823 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9824 	if (!pages)
9825 		return 0;
9826 
9827 	for (i = 0; i < page_count; i++) {
9828 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9829 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9830 	}
9831 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9832 	kfree(pages);
9833 
9834 	return (u64)(unsigned long)vaddr;
9835 }
9836 
9837 /**
9838  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9839  * @name: The name of the trace array to be looked up/created.
9840  * @systems: A list of systems to create event directories for (NULL for all)
9841  *
9842  * Returns pointer to trace array with given name.
9843  * NULL, if it cannot be created.
9844  *
9845  * NOTE: This function increments the reference counter associated with the
9846  * trace array returned. This makes sure it cannot be freed while in use.
9847  * Use trace_array_put() once the trace array is no longer needed.
9848  * If the trace_array is to be freed, trace_array_destroy() needs to
9849  * be called after the trace_array_put(), or simply let user space delete
9850  * it from the tracefs instances directory. But until the
9851  * trace_array_put() is called, user space can not delete it.
9852  *
9853  */
9854 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9855 {
9856 	struct trace_array *tr;
9857 
9858 	guard(mutex)(&event_mutex);
9859 	guard(mutex)(&trace_types_lock);
9860 
9861 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9862 		if (tr->name && strcmp(tr->name, name) == 0) {
9863 			tr->ref++;
9864 			return tr;
9865 		}
9866 	}
9867 
9868 	tr = trace_array_create_systems(name, systems, 0, 0);
9869 
9870 	if (IS_ERR(tr))
9871 		tr = NULL;
9872 	else
9873 		tr->ref++;
9874 
9875 	return tr;
9876 }
9877 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9878 
9879 static int __remove_instance(struct trace_array *tr)
9880 {
9881 	int i;
9882 
9883 	/* Reference counter for a newly created trace array = 1. */
9884 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9885 		return -EBUSY;
9886 
9887 	list_del(&tr->list);
9888 
9889 	/* Disable all the flags that were enabled coming in */
9890 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9891 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9892 			set_tracer_flag(tr, 1 << i, 0);
9893 	}
9894 
9895 	if (printk_trace == tr)
9896 		update_printk_trace(&global_trace);
9897 
9898 	tracing_set_nop(tr);
9899 	clear_ftrace_function_probes(tr);
9900 	event_trace_del_tracer(tr);
9901 	ftrace_clear_pids(tr);
9902 	ftrace_destroy_function_files(tr);
9903 	tracefs_remove(tr->dir);
9904 	free_percpu(tr->last_func_repeats);
9905 	free_trace_buffers(tr);
9906 	clear_tracing_err_log(tr);
9907 
9908 	if (tr->range_name) {
9909 		reserve_mem_release_by_name(tr->range_name);
9910 		kfree(tr->range_name);
9911 	}
9912 
9913 	for (i = 0; i < tr->nr_topts; i++) {
9914 		kfree(tr->topts[i].topts);
9915 	}
9916 	kfree(tr->topts);
9917 
9918 	free_cpumask_var(tr->pipe_cpumask);
9919 	free_cpumask_var(tr->tracing_cpumask);
9920 	kfree_const(tr->system_names);
9921 	kfree(tr->name);
9922 	kfree(tr);
9923 
9924 	return 0;
9925 }
9926 
9927 int trace_array_destroy(struct trace_array *this_tr)
9928 {
9929 	struct trace_array *tr;
9930 
9931 	if (!this_tr)
9932 		return -EINVAL;
9933 
9934 	guard(mutex)(&event_mutex);
9935 	guard(mutex)(&trace_types_lock);
9936 
9937 
9938 	/* Making sure trace array exists before destroying it. */
9939 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9940 		if (tr == this_tr)
9941 			return __remove_instance(tr);
9942 	}
9943 
9944 	return -ENODEV;
9945 }
9946 EXPORT_SYMBOL_GPL(trace_array_destroy);
9947 
9948 static int instance_rmdir(const char *name)
9949 {
9950 	struct trace_array *tr;
9951 
9952 	guard(mutex)(&event_mutex);
9953 	guard(mutex)(&trace_types_lock);
9954 
9955 	tr = trace_array_find(name);
9956 	if (!tr)
9957 		return -ENODEV;
9958 
9959 	return __remove_instance(tr);
9960 }
9961 
9962 static __init void create_trace_instances(struct dentry *d_tracer)
9963 {
9964 	struct trace_array *tr;
9965 
9966 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9967 							 instance_mkdir,
9968 							 instance_rmdir);
9969 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9970 		return;
9971 
9972 	guard(mutex)(&event_mutex);
9973 	guard(mutex)(&trace_types_lock);
9974 
9975 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9976 		if (!tr->name)
9977 			continue;
9978 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9979 			     "Failed to create instance directory\n"))
9980 			return;
9981 	}
9982 }
9983 
9984 static void
9985 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9986 {
9987 	int cpu;
9988 
9989 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9990 			tr, &show_traces_fops);
9991 
9992 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9993 			tr, &set_tracer_fops);
9994 
9995 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9996 			  tr, &tracing_cpumask_fops);
9997 
9998 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9999 			  tr, &tracing_iter_fops);
10000 
10001 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10002 			  tr, &tracing_fops);
10003 
10004 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10005 			  tr, &tracing_pipe_fops);
10006 
10007 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10008 			  tr, &tracing_entries_fops);
10009 
10010 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10011 			  tr, &tracing_total_entries_fops);
10012 
10013 	trace_create_file("free_buffer", 0200, d_tracer,
10014 			  tr, &tracing_free_buffer_fops);
10015 
10016 	trace_create_file("trace_marker", 0220, d_tracer,
10017 			  tr, &tracing_mark_fops);
10018 
10019 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10020 
10021 	trace_create_file("trace_marker_raw", 0220, d_tracer,
10022 			  tr, &tracing_mark_raw_fops);
10023 
10024 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10025 			  &trace_clock_fops);
10026 
10027 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10028 			  tr, &rb_simple_fops);
10029 
10030 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10031 			  &trace_time_stamp_mode_fops);
10032 
10033 	tr->buffer_percent = 50;
10034 
10035 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10036 			tr, &buffer_percent_fops);
10037 
10038 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10039 			  tr, &buffer_subbuf_size_fops);
10040 
10041 	create_trace_options_dir(tr);
10042 
10043 #ifdef CONFIG_TRACER_MAX_TRACE
10044 	trace_create_maxlat_file(tr, d_tracer);
10045 #endif
10046 
10047 	if (ftrace_create_function_files(tr, d_tracer))
10048 		MEM_FAIL(1, "Could not allocate function filter files");
10049 
10050 	if (tr->range_addr_start) {
10051 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10052 				  tr, &last_boot_fops);
10053 #ifdef CONFIG_TRACER_SNAPSHOT
10054 	} else {
10055 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10056 				  tr, &snapshot_fops);
10057 #endif
10058 	}
10059 
10060 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10061 			  tr, &tracing_err_log_fops);
10062 
10063 	for_each_tracing_cpu(cpu)
10064 		tracing_init_tracefs_percpu(tr, cpu);
10065 
10066 	ftrace_init_tracefs(tr, d_tracer);
10067 }
10068 
10069 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10070 {
10071 	struct vfsmount *mnt;
10072 	struct file_system_type *type;
10073 
10074 	/*
10075 	 * To maintain backward compatibility for tools that mount
10076 	 * debugfs to get to the tracing facility, tracefs is automatically
10077 	 * mounted to the debugfs/tracing directory.
10078 	 */
10079 	type = get_fs_type("tracefs");
10080 	if (!type)
10081 		return NULL;
10082 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10083 	put_filesystem(type);
10084 	if (IS_ERR(mnt))
10085 		return NULL;
10086 	mntget(mnt);
10087 
10088 	return mnt;
10089 }
10090 
10091 /**
10092  * tracing_init_dentry - initialize top level trace array
10093  *
10094  * This is called when creating files or directories in the tracing
10095  * directory. It is called via fs_initcall() by any of the boot up code
10096  * and expects to return the dentry of the top level tracing directory.
10097  */
10098 int tracing_init_dentry(void)
10099 {
10100 	struct trace_array *tr = &global_trace;
10101 
10102 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10103 		pr_warn("Tracing disabled due to lockdown\n");
10104 		return -EPERM;
10105 	}
10106 
10107 	/* The top level trace array uses  NULL as parent */
10108 	if (tr->dir)
10109 		return 0;
10110 
10111 	if (WARN_ON(!tracefs_initialized()))
10112 		return -ENODEV;
10113 
10114 	/*
10115 	 * As there may still be users that expect the tracing
10116 	 * files to exist in debugfs/tracing, we must automount
10117 	 * the tracefs file system there, so older tools still
10118 	 * work with the newer kernel.
10119 	 */
10120 	tr->dir = debugfs_create_automount("tracing", NULL,
10121 					   trace_automount, NULL);
10122 
10123 	return 0;
10124 }
10125 
10126 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10127 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10128 
10129 static struct workqueue_struct *eval_map_wq __initdata;
10130 static struct work_struct eval_map_work __initdata;
10131 static struct work_struct tracerfs_init_work __initdata;
10132 
10133 static void __init eval_map_work_func(struct work_struct *work)
10134 {
10135 	int len;
10136 
10137 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10138 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10139 }
10140 
10141 static int __init trace_eval_init(void)
10142 {
10143 	INIT_WORK(&eval_map_work, eval_map_work_func);
10144 
10145 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10146 	if (!eval_map_wq) {
10147 		pr_err("Unable to allocate eval_map_wq\n");
10148 		/* Do work here */
10149 		eval_map_work_func(&eval_map_work);
10150 		return -ENOMEM;
10151 	}
10152 
10153 	queue_work(eval_map_wq, &eval_map_work);
10154 	return 0;
10155 }
10156 
10157 subsys_initcall(trace_eval_init);
10158 
10159 static int __init trace_eval_sync(void)
10160 {
10161 	/* Make sure the eval map updates are finished */
10162 	if (eval_map_wq)
10163 		destroy_workqueue(eval_map_wq);
10164 	return 0;
10165 }
10166 
10167 late_initcall_sync(trace_eval_sync);
10168 
10169 
10170 #ifdef CONFIG_MODULES
10171 
10172 bool module_exists(const char *module)
10173 {
10174 	/* All modules have the symbol __this_module */
10175 	static const char this_mod[] = "__this_module";
10176 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10177 	unsigned long val;
10178 	int n;
10179 
10180 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10181 
10182 	if (n > sizeof(modname) - 1)
10183 		return false;
10184 
10185 	val = module_kallsyms_lookup_name(modname);
10186 	return val != 0;
10187 }
10188 
10189 static void trace_module_add_evals(struct module *mod)
10190 {
10191 	if (!mod->num_trace_evals)
10192 		return;
10193 
10194 	/*
10195 	 * Modules with bad taint do not have events created, do
10196 	 * not bother with enums either.
10197 	 */
10198 	if (trace_module_has_bad_taint(mod))
10199 		return;
10200 
10201 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10202 }
10203 
10204 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10205 static void trace_module_remove_evals(struct module *mod)
10206 {
10207 	union trace_eval_map_item *map;
10208 	union trace_eval_map_item **last = &trace_eval_maps;
10209 
10210 	if (!mod->num_trace_evals)
10211 		return;
10212 
10213 	guard(mutex)(&trace_eval_mutex);
10214 
10215 	map = trace_eval_maps;
10216 
10217 	while (map) {
10218 		if (map->head.mod == mod)
10219 			break;
10220 		map = trace_eval_jmp_to_tail(map);
10221 		last = &map->tail.next;
10222 		map = map->tail.next;
10223 	}
10224 	if (!map)
10225 		return;
10226 
10227 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10228 	kfree(map);
10229 }
10230 #else
10231 static inline void trace_module_remove_evals(struct module *mod) { }
10232 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10233 
10234 static void trace_module_record(struct module *mod, bool add)
10235 {
10236 	struct trace_array *tr;
10237 	unsigned long flags;
10238 
10239 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10240 		flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10241 		/* Update any persistent trace array that has already been started */
10242 		if (flags == TRACE_ARRAY_FL_BOOT && add) {
10243 			guard(mutex)(&scratch_mutex);
10244 			save_mod(mod, tr);
10245 		} else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10246 			/* Update delta if the module loaded in previous boot */
10247 			make_mod_delta(mod, tr);
10248 		}
10249 	}
10250 }
10251 
10252 static int trace_module_notify(struct notifier_block *self,
10253 			       unsigned long val, void *data)
10254 {
10255 	struct module *mod = data;
10256 
10257 	switch (val) {
10258 	case MODULE_STATE_COMING:
10259 		trace_module_add_evals(mod);
10260 		trace_module_record(mod, true);
10261 		break;
10262 	case MODULE_STATE_GOING:
10263 		trace_module_remove_evals(mod);
10264 		trace_module_record(mod, false);
10265 		break;
10266 	}
10267 
10268 	return NOTIFY_OK;
10269 }
10270 
10271 static struct notifier_block trace_module_nb = {
10272 	.notifier_call = trace_module_notify,
10273 	.priority = 0,
10274 };
10275 #endif /* CONFIG_MODULES */
10276 
10277 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10278 {
10279 
10280 	event_trace_init();
10281 
10282 	init_tracer_tracefs(&global_trace, NULL);
10283 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10284 
10285 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10286 			&global_trace, &tracing_thresh_fops);
10287 
10288 	trace_create_file("README", TRACE_MODE_READ, NULL,
10289 			NULL, &tracing_readme_fops);
10290 
10291 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10292 			NULL, &tracing_saved_cmdlines_fops);
10293 
10294 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10295 			  NULL, &tracing_saved_cmdlines_size_fops);
10296 
10297 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10298 			NULL, &tracing_saved_tgids_fops);
10299 
10300 	trace_create_eval_file(NULL);
10301 
10302 #ifdef CONFIG_MODULES
10303 	register_module_notifier(&trace_module_nb);
10304 #endif
10305 
10306 #ifdef CONFIG_DYNAMIC_FTRACE
10307 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10308 			NULL, &tracing_dyn_info_fops);
10309 #endif
10310 
10311 	create_trace_instances(NULL);
10312 
10313 	update_tracer_options(&global_trace);
10314 }
10315 
10316 static __init int tracer_init_tracefs(void)
10317 {
10318 	int ret;
10319 
10320 	trace_access_lock_init();
10321 
10322 	ret = tracing_init_dentry();
10323 	if (ret)
10324 		return 0;
10325 
10326 	if (eval_map_wq) {
10327 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10328 		queue_work(eval_map_wq, &tracerfs_init_work);
10329 	} else {
10330 		tracer_init_tracefs_work_func(NULL);
10331 	}
10332 
10333 	rv_init_interface();
10334 
10335 	return 0;
10336 }
10337 
10338 fs_initcall(tracer_init_tracefs);
10339 
10340 static int trace_die_panic_handler(struct notifier_block *self,
10341 				unsigned long ev, void *unused);
10342 
10343 static struct notifier_block trace_panic_notifier = {
10344 	.notifier_call = trace_die_panic_handler,
10345 	.priority = INT_MAX - 1,
10346 };
10347 
10348 static struct notifier_block trace_die_notifier = {
10349 	.notifier_call = trace_die_panic_handler,
10350 	.priority = INT_MAX - 1,
10351 };
10352 
10353 /*
10354  * The idea is to execute the following die/panic callback early, in order
10355  * to avoid showing irrelevant information in the trace (like other panic
10356  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10357  * warnings get disabled (to prevent potential log flooding).
10358  */
10359 static int trace_die_panic_handler(struct notifier_block *self,
10360 				unsigned long ev, void *unused)
10361 {
10362 	if (!ftrace_dump_on_oops_enabled())
10363 		return NOTIFY_DONE;
10364 
10365 	/* The die notifier requires DIE_OOPS to trigger */
10366 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10367 		return NOTIFY_DONE;
10368 
10369 	ftrace_dump(DUMP_PARAM);
10370 
10371 	return NOTIFY_DONE;
10372 }
10373 
10374 /*
10375  * printk is set to max of 1024, we really don't need it that big.
10376  * Nothing should be printing 1000 characters anyway.
10377  */
10378 #define TRACE_MAX_PRINT		1000
10379 
10380 /*
10381  * Define here KERN_TRACE so that we have one place to modify
10382  * it if we decide to change what log level the ftrace dump
10383  * should be at.
10384  */
10385 #define KERN_TRACE		KERN_EMERG
10386 
10387 void
10388 trace_printk_seq(struct trace_seq *s)
10389 {
10390 	/* Probably should print a warning here. */
10391 	if (s->seq.len >= TRACE_MAX_PRINT)
10392 		s->seq.len = TRACE_MAX_PRINT;
10393 
10394 	/*
10395 	 * More paranoid code. Although the buffer size is set to
10396 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10397 	 * an extra layer of protection.
10398 	 */
10399 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10400 		s->seq.len = s->seq.size - 1;
10401 
10402 	/* should be zero ended, but we are paranoid. */
10403 	s->buffer[s->seq.len] = 0;
10404 
10405 	printk(KERN_TRACE "%s", s->buffer);
10406 
10407 	trace_seq_init(s);
10408 }
10409 
10410 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10411 {
10412 	iter->tr = tr;
10413 	iter->trace = iter->tr->current_trace;
10414 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10415 	iter->array_buffer = &tr->array_buffer;
10416 
10417 	if (iter->trace && iter->trace->open)
10418 		iter->trace->open(iter);
10419 
10420 	/* Annotate start of buffers if we had overruns */
10421 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10422 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10423 
10424 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10425 	if (trace_clocks[iter->tr->clock_id].in_ns)
10426 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10427 
10428 	/* Can not use kmalloc for iter.temp and iter.fmt */
10429 	iter->temp = static_temp_buf;
10430 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10431 	iter->fmt = static_fmt_buf;
10432 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10433 }
10434 
10435 void trace_init_global_iter(struct trace_iterator *iter)
10436 {
10437 	trace_init_iter(iter, &global_trace);
10438 }
10439 
10440 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10441 {
10442 	/* use static because iter can be a bit big for the stack */
10443 	static struct trace_iterator iter;
10444 	unsigned int old_userobj;
10445 	unsigned long flags;
10446 	int cnt = 0, cpu;
10447 
10448 	/*
10449 	 * Always turn off tracing when we dump.
10450 	 * We don't need to show trace output of what happens
10451 	 * between multiple crashes.
10452 	 *
10453 	 * If the user does a sysrq-z, then they can re-enable
10454 	 * tracing with echo 1 > tracing_on.
10455 	 */
10456 	tracer_tracing_off(tr);
10457 
10458 	local_irq_save(flags);
10459 
10460 	/* Simulate the iterator */
10461 	trace_init_iter(&iter, tr);
10462 
10463 	for_each_tracing_cpu(cpu) {
10464 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10465 	}
10466 
10467 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10468 
10469 	/* don't look at user memory in panic mode */
10470 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10471 
10472 	if (dump_mode == DUMP_ORIG)
10473 		iter.cpu_file = raw_smp_processor_id();
10474 	else
10475 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10476 
10477 	if (tr == &global_trace)
10478 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10479 	else
10480 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10481 
10482 	/* Did function tracer already get disabled? */
10483 	if (ftrace_is_dead()) {
10484 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10485 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10486 	}
10487 
10488 	/*
10489 	 * We need to stop all tracing on all CPUS to read
10490 	 * the next buffer. This is a bit expensive, but is
10491 	 * not done often. We fill all what we can read,
10492 	 * and then release the locks again.
10493 	 */
10494 
10495 	while (!trace_empty(&iter)) {
10496 
10497 		if (!cnt)
10498 			printk(KERN_TRACE "---------------------------------\n");
10499 
10500 		cnt++;
10501 
10502 		trace_iterator_reset(&iter);
10503 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10504 
10505 		if (trace_find_next_entry_inc(&iter) != NULL) {
10506 			int ret;
10507 
10508 			ret = print_trace_line(&iter);
10509 			if (ret != TRACE_TYPE_NO_CONSUME)
10510 				trace_consume(&iter);
10511 		}
10512 		touch_nmi_watchdog();
10513 
10514 		trace_printk_seq(&iter.seq);
10515 	}
10516 
10517 	if (!cnt)
10518 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10519 	else
10520 		printk(KERN_TRACE "---------------------------------\n");
10521 
10522 	tr->trace_flags |= old_userobj;
10523 
10524 	for_each_tracing_cpu(cpu) {
10525 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10526 	}
10527 	local_irq_restore(flags);
10528 }
10529 
10530 static void ftrace_dump_by_param(void)
10531 {
10532 	bool first_param = true;
10533 	char dump_param[MAX_TRACER_SIZE];
10534 	char *buf, *token, *inst_name;
10535 	struct trace_array *tr;
10536 
10537 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10538 	buf = dump_param;
10539 
10540 	while ((token = strsep(&buf, ",")) != NULL) {
10541 		if (first_param) {
10542 			first_param = false;
10543 			if (!strcmp("0", token))
10544 				continue;
10545 			else if (!strcmp("1", token)) {
10546 				ftrace_dump_one(&global_trace, DUMP_ALL);
10547 				continue;
10548 			}
10549 			else if (!strcmp("2", token) ||
10550 			  !strcmp("orig_cpu", token)) {
10551 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10552 				continue;
10553 			}
10554 		}
10555 
10556 		inst_name = strsep(&token, "=");
10557 		tr = trace_array_find(inst_name);
10558 		if (!tr) {
10559 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10560 			continue;
10561 		}
10562 
10563 		if (token && (!strcmp("2", token) ||
10564 			  !strcmp("orig_cpu", token)))
10565 			ftrace_dump_one(tr, DUMP_ORIG);
10566 		else
10567 			ftrace_dump_one(tr, DUMP_ALL);
10568 	}
10569 }
10570 
10571 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10572 {
10573 	static atomic_t dump_running;
10574 
10575 	/* Only allow one dump user at a time. */
10576 	if (atomic_inc_return(&dump_running) != 1) {
10577 		atomic_dec(&dump_running);
10578 		return;
10579 	}
10580 
10581 	switch (oops_dump_mode) {
10582 	case DUMP_ALL:
10583 		ftrace_dump_one(&global_trace, DUMP_ALL);
10584 		break;
10585 	case DUMP_ORIG:
10586 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10587 		break;
10588 	case DUMP_PARAM:
10589 		ftrace_dump_by_param();
10590 		break;
10591 	case DUMP_NONE:
10592 		break;
10593 	default:
10594 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10595 		ftrace_dump_one(&global_trace, DUMP_ALL);
10596 	}
10597 
10598 	atomic_dec(&dump_running);
10599 }
10600 EXPORT_SYMBOL_GPL(ftrace_dump);
10601 
10602 #define WRITE_BUFSIZE  4096
10603 
10604 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10605 				size_t count, loff_t *ppos,
10606 				int (*createfn)(const char *))
10607 {
10608 	char *kbuf, *buf, *tmp;
10609 	int ret = 0;
10610 	size_t done = 0;
10611 	size_t size;
10612 
10613 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10614 	if (!kbuf)
10615 		return -ENOMEM;
10616 
10617 	while (done < count) {
10618 		size = count - done;
10619 
10620 		if (size >= WRITE_BUFSIZE)
10621 			size = WRITE_BUFSIZE - 1;
10622 
10623 		if (copy_from_user(kbuf, buffer + done, size)) {
10624 			ret = -EFAULT;
10625 			goto out;
10626 		}
10627 		kbuf[size] = '\0';
10628 		buf = kbuf;
10629 		do {
10630 			tmp = strchr(buf, '\n');
10631 			if (tmp) {
10632 				*tmp = '\0';
10633 				size = tmp - buf + 1;
10634 			} else {
10635 				size = strlen(buf);
10636 				if (done + size < count) {
10637 					if (buf != kbuf)
10638 						break;
10639 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10640 					pr_warn("Line length is too long: Should be less than %d\n",
10641 						WRITE_BUFSIZE - 2);
10642 					ret = -EINVAL;
10643 					goto out;
10644 				}
10645 			}
10646 			done += size;
10647 
10648 			/* Remove comments */
10649 			tmp = strchr(buf, '#');
10650 
10651 			if (tmp)
10652 				*tmp = '\0';
10653 
10654 			ret = createfn(buf);
10655 			if (ret)
10656 				goto out;
10657 			buf += size;
10658 
10659 		} while (done < count);
10660 	}
10661 	ret = done;
10662 
10663 out:
10664 	kfree(kbuf);
10665 
10666 	return ret;
10667 }
10668 
10669 #ifdef CONFIG_TRACER_MAX_TRACE
10670 __init static bool tr_needs_alloc_snapshot(const char *name)
10671 {
10672 	char *test;
10673 	int len = strlen(name);
10674 	bool ret;
10675 
10676 	if (!boot_snapshot_index)
10677 		return false;
10678 
10679 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10680 	    boot_snapshot_info[len] == '\t')
10681 		return true;
10682 
10683 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10684 	if (!test)
10685 		return false;
10686 
10687 	sprintf(test, "\t%s\t", name);
10688 	ret = strstr(boot_snapshot_info, test) == NULL;
10689 	kfree(test);
10690 	return ret;
10691 }
10692 
10693 __init static void do_allocate_snapshot(const char *name)
10694 {
10695 	if (!tr_needs_alloc_snapshot(name))
10696 		return;
10697 
10698 	/*
10699 	 * When allocate_snapshot is set, the next call to
10700 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10701 	 * will allocate the snapshot buffer. That will alse clear
10702 	 * this flag.
10703 	 */
10704 	allocate_snapshot = true;
10705 }
10706 #else
10707 static inline void do_allocate_snapshot(const char *name) { }
10708 #endif
10709 
10710 __init static void enable_instances(void)
10711 {
10712 	struct trace_array *tr;
10713 	char *curr_str;
10714 	char *name;
10715 	char *str;
10716 	char *tok;
10717 
10718 	/* A tab is always appended */
10719 	boot_instance_info[boot_instance_index - 1] = '\0';
10720 	str = boot_instance_info;
10721 
10722 	while ((curr_str = strsep(&str, "\t"))) {
10723 		phys_addr_t start = 0;
10724 		phys_addr_t size = 0;
10725 		unsigned long addr = 0;
10726 		bool traceprintk = false;
10727 		bool traceoff = false;
10728 		char *flag_delim;
10729 		char *addr_delim;
10730 		char *rname __free(kfree) = NULL;
10731 
10732 		tok = strsep(&curr_str, ",");
10733 
10734 		flag_delim = strchr(tok, '^');
10735 		addr_delim = strchr(tok, '@');
10736 
10737 		if (addr_delim)
10738 			*addr_delim++ = '\0';
10739 
10740 		if (flag_delim)
10741 			*flag_delim++ = '\0';
10742 
10743 		name = tok;
10744 
10745 		if (flag_delim) {
10746 			char *flag;
10747 
10748 			while ((flag = strsep(&flag_delim, "^"))) {
10749 				if (strcmp(flag, "traceoff") == 0) {
10750 					traceoff = true;
10751 				} else if ((strcmp(flag, "printk") == 0) ||
10752 					   (strcmp(flag, "traceprintk") == 0) ||
10753 					   (strcmp(flag, "trace_printk") == 0)) {
10754 					traceprintk = true;
10755 				} else {
10756 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10757 						flag, name);
10758 				}
10759 			}
10760 		}
10761 
10762 		tok = addr_delim;
10763 		if (tok && isdigit(*tok)) {
10764 			start = memparse(tok, &tok);
10765 			if (!start) {
10766 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10767 					name);
10768 				continue;
10769 			}
10770 			if (*tok != ':') {
10771 				pr_warn("Tracing: No size specified for instance %s\n", name);
10772 				continue;
10773 			}
10774 			tok++;
10775 			size = memparse(tok, &tok);
10776 			if (!size) {
10777 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10778 					name);
10779 				continue;
10780 			}
10781 		} else if (tok) {
10782 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10783 				start = 0;
10784 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10785 				continue;
10786 			}
10787 			rname = kstrdup(tok, GFP_KERNEL);
10788 		}
10789 
10790 		if (start) {
10791 			addr = map_pages(start, size);
10792 			if (addr) {
10793 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10794 					name, &start, (unsigned long)size);
10795 			} else {
10796 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10797 				continue;
10798 			}
10799 		} else {
10800 			/* Only non mapped buffers have snapshot buffers */
10801 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10802 				do_allocate_snapshot(name);
10803 		}
10804 
10805 		tr = trace_array_create_systems(name, NULL, addr, size);
10806 		if (IS_ERR(tr)) {
10807 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10808 			continue;
10809 		}
10810 
10811 		if (traceoff)
10812 			tracer_tracing_off(tr);
10813 
10814 		if (traceprintk)
10815 			update_printk_trace(tr);
10816 
10817 		/*
10818 		 * If start is set, then this is a mapped buffer, and
10819 		 * cannot be deleted by user space, so keep the reference
10820 		 * to it.
10821 		 */
10822 		if (start) {
10823 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10824 			tr->range_name = no_free_ptr(rname);
10825 		}
10826 
10827 		while ((tok = strsep(&curr_str, ","))) {
10828 			early_enable_events(tr, tok, true);
10829 		}
10830 	}
10831 }
10832 
10833 __init static int tracer_alloc_buffers(void)
10834 {
10835 	int ring_buf_size;
10836 	int ret = -ENOMEM;
10837 
10838 
10839 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10840 		pr_warn("Tracing disabled due to lockdown\n");
10841 		return -EPERM;
10842 	}
10843 
10844 	/*
10845 	 * Make sure we don't accidentally add more trace options
10846 	 * than we have bits for.
10847 	 */
10848 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10849 
10850 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10851 		goto out;
10852 
10853 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10854 		goto out_free_buffer_mask;
10855 
10856 	/* Only allocate trace_printk buffers if a trace_printk exists */
10857 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10858 		/* Must be called before global_trace.buffer is allocated */
10859 		trace_printk_init_buffers();
10860 
10861 	/* To save memory, keep the ring buffer size to its minimum */
10862 	if (global_trace.ring_buffer_expanded)
10863 		ring_buf_size = trace_buf_size;
10864 	else
10865 		ring_buf_size = 1;
10866 
10867 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10868 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10869 
10870 	raw_spin_lock_init(&global_trace.start_lock);
10871 
10872 	/*
10873 	 * The prepare callbacks allocates some memory for the ring buffer. We
10874 	 * don't free the buffer if the CPU goes down. If we were to free
10875 	 * the buffer, then the user would lose any trace that was in the
10876 	 * buffer. The memory will be removed once the "instance" is removed.
10877 	 */
10878 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10879 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10880 				      NULL);
10881 	if (ret < 0)
10882 		goto out_free_cpumask;
10883 	/* Used for event triggers */
10884 	ret = -ENOMEM;
10885 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10886 	if (!temp_buffer)
10887 		goto out_rm_hp_state;
10888 
10889 	if (trace_create_savedcmd() < 0)
10890 		goto out_free_temp_buffer;
10891 
10892 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10893 		goto out_free_savedcmd;
10894 
10895 	/* TODO: make the number of buffers hot pluggable with CPUS */
10896 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10897 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10898 		goto out_free_pipe_cpumask;
10899 	}
10900 	if (global_trace.buffer_disabled)
10901 		tracing_off();
10902 
10903 	if (trace_boot_clock) {
10904 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10905 		if (ret < 0)
10906 			pr_warn("Trace clock %s not defined, going back to default\n",
10907 				trace_boot_clock);
10908 	}
10909 
10910 	/*
10911 	 * register_tracer() might reference current_trace, so it
10912 	 * needs to be set before we register anything. This is
10913 	 * just a bootstrap of current_trace anyway.
10914 	 */
10915 	global_trace.current_trace = &nop_trace;
10916 
10917 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10918 #ifdef CONFIG_TRACER_MAX_TRACE
10919 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10920 #endif
10921 	ftrace_init_global_array_ops(&global_trace);
10922 
10923 #ifdef CONFIG_MODULES
10924 	INIT_LIST_HEAD(&global_trace.mod_events);
10925 #endif
10926 
10927 	init_trace_flags_index(&global_trace);
10928 
10929 	register_tracer(&nop_trace);
10930 
10931 	/* Function tracing may start here (via kernel command line) */
10932 	init_function_trace();
10933 
10934 	/* All seems OK, enable tracing */
10935 	tracing_disabled = 0;
10936 
10937 	atomic_notifier_chain_register(&panic_notifier_list,
10938 				       &trace_panic_notifier);
10939 
10940 	register_die_notifier(&trace_die_notifier);
10941 
10942 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10943 
10944 	INIT_LIST_HEAD(&global_trace.systems);
10945 	INIT_LIST_HEAD(&global_trace.events);
10946 	INIT_LIST_HEAD(&global_trace.hist_vars);
10947 	INIT_LIST_HEAD(&global_trace.err_log);
10948 	list_add(&global_trace.list, &ftrace_trace_arrays);
10949 
10950 	apply_trace_boot_options();
10951 
10952 	register_snapshot_cmd();
10953 
10954 	return 0;
10955 
10956 out_free_pipe_cpumask:
10957 	free_cpumask_var(global_trace.pipe_cpumask);
10958 out_free_savedcmd:
10959 	trace_free_saved_cmdlines_buffer();
10960 out_free_temp_buffer:
10961 	ring_buffer_free(temp_buffer);
10962 out_rm_hp_state:
10963 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10964 out_free_cpumask:
10965 	free_cpumask_var(global_trace.tracing_cpumask);
10966 out_free_buffer_mask:
10967 	free_cpumask_var(tracing_buffer_mask);
10968 out:
10969 	return ret;
10970 }
10971 
10972 #ifdef CONFIG_FUNCTION_TRACER
10973 /* Used to set module cached ftrace filtering at boot up */
10974 __init struct trace_array *trace_get_global_array(void)
10975 {
10976 	return &global_trace;
10977 }
10978 #endif
10979 
10980 void __init ftrace_boot_snapshot(void)
10981 {
10982 #ifdef CONFIG_TRACER_MAX_TRACE
10983 	struct trace_array *tr;
10984 
10985 	if (!snapshot_at_boot)
10986 		return;
10987 
10988 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10989 		if (!tr->allocated_snapshot)
10990 			continue;
10991 
10992 		tracing_snapshot_instance(tr);
10993 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10994 	}
10995 #endif
10996 }
10997 
10998 void __init early_trace_init(void)
10999 {
11000 	if (tracepoint_printk) {
11001 		tracepoint_print_iter =
11002 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11003 		if (MEM_FAIL(!tracepoint_print_iter,
11004 			     "Failed to allocate trace iterator\n"))
11005 			tracepoint_printk = 0;
11006 		else
11007 			static_key_enable(&tracepoint_printk_key.key);
11008 	}
11009 	tracer_alloc_buffers();
11010 
11011 	init_events();
11012 }
11013 
11014 void __init trace_init(void)
11015 {
11016 	trace_event_init();
11017 
11018 	if (boot_instance_index)
11019 		enable_instances();
11020 }
11021 
11022 __init static void clear_boot_tracer(void)
11023 {
11024 	/*
11025 	 * The default tracer at boot buffer is an init section.
11026 	 * This function is called in lateinit. If we did not
11027 	 * find the boot tracer, then clear it out, to prevent
11028 	 * later registration from accessing the buffer that is
11029 	 * about to be freed.
11030 	 */
11031 	if (!default_bootup_tracer)
11032 		return;
11033 
11034 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11035 	       default_bootup_tracer);
11036 	default_bootup_tracer = NULL;
11037 }
11038 
11039 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11040 __init static void tracing_set_default_clock(void)
11041 {
11042 	/* sched_clock_stable() is determined in late_initcall */
11043 	if (!trace_boot_clock && !sched_clock_stable()) {
11044 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
11045 			pr_warn("Can not set tracing clock due to lockdown\n");
11046 			return;
11047 		}
11048 
11049 		printk(KERN_WARNING
11050 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
11051 		       "If you want to keep using the local clock, then add:\n"
11052 		       "  \"trace_clock=local\"\n"
11053 		       "on the kernel command line\n");
11054 		tracing_set_clock(&global_trace, "global");
11055 	}
11056 }
11057 #else
11058 static inline void tracing_set_default_clock(void) { }
11059 #endif
11060 
11061 __init static int late_trace_init(void)
11062 {
11063 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11064 		static_key_disable(&tracepoint_printk_key.key);
11065 		tracepoint_printk = 0;
11066 	}
11067 
11068 	if (traceoff_after_boot)
11069 		tracing_off();
11070 
11071 	tracing_set_default_clock();
11072 	clear_boot_tracer();
11073 	return 0;
11074 }
11075 
11076 late_initcall_sync(late_trace_init);
11077