1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58
59 #include "trace.h"
60 #include "trace_output.h"
61
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
69 */
70 bool __read_mostly tracing_selftest_running;
71
72 /*
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
75 */
76 bool __read_mostly tracing_selftest_disabled;
77
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 }
84 }
85 #else
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* Store tracers and their flags per instance */
97 struct tracers {
98 struct list_head list;
99 struct tracer *tracer;
100 struct tracer_flags *flags;
101 };
102
103 /*
104 * To prevent the comm cache from being overwritten when no
105 * tracing is active, only save the comm when a trace event
106 * occurred.
107 */
108 DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111 * Kill all tracing for good (never come back).
112 * It is initialized to 1 but will turn to zero if the initialization
113 * of the tracer is successful. But that is the only place that sets
114 * this back to zero.
115 */
116 int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly tracing_buffer_mask;
119
120 #define MAX_TRACER_SIZE 100
121 /*
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123 *
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputting it to a
128 * serial console.
129 *
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 * Set instance name if you want to dump the specific trace instance
136 * Multiple instance dump is also supported, and instances are separated
137 * by commas.
138 */
139 /* Set to string format zero to disable by default */
140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141
142 /* When set, tracing will stop when a WARN*() is hit */
143 static int __disable_trace_on_warning;
144
145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
146 void *buffer, size_t *lenp, loff_t *ppos);
147 static const struct ctl_table trace_sysctl_table[] = {
148 {
149 .procname = "ftrace_dump_on_oops",
150 .data = &ftrace_dump_on_oops,
151 .maxlen = MAX_TRACER_SIZE,
152 .mode = 0644,
153 .proc_handler = proc_dostring,
154 },
155 {
156 .procname = "traceoff_on_warning",
157 .data = &__disable_trace_on_warning,
158 .maxlen = sizeof(__disable_trace_on_warning),
159 .mode = 0644,
160 .proc_handler = proc_dointvec,
161 },
162 {
163 .procname = "tracepoint_printk",
164 .data = &tracepoint_printk,
165 .maxlen = sizeof(tracepoint_printk),
166 .mode = 0644,
167 .proc_handler = tracepoint_printk_sysctl,
168 },
169 };
170
init_trace_sysctls(void)171 static int __init init_trace_sysctls(void)
172 {
173 register_sysctl_init("kernel", trace_sysctl_table);
174 return 0;
175 }
176 subsys_initcall(init_trace_sysctls);
177
178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
179 /* Map of enums to their values, for "eval_map" file */
180 struct trace_eval_map_head {
181 struct module *mod;
182 unsigned long length;
183 };
184
185 union trace_eval_map_item;
186
187 struct trace_eval_map_tail {
188 /*
189 * "end" is first and points to NULL as it must be different
190 * than "mod" or "eval_string"
191 */
192 union trace_eval_map_item *next;
193 const char *end; /* points to NULL */
194 };
195
196 static DEFINE_MUTEX(trace_eval_mutex);
197
198 /*
199 * The trace_eval_maps are saved in an array with two extra elements,
200 * one at the beginning, and one at the end. The beginning item contains
201 * the count of the saved maps (head.length), and the module they
202 * belong to if not built in (head.mod). The ending item contains a
203 * pointer to the next array of saved eval_map items.
204 */
205 union trace_eval_map_item {
206 struct trace_eval_map map;
207 struct trace_eval_map_head head;
208 struct trace_eval_map_tail tail;
209 };
210
211 static union trace_eval_map_item *trace_eval_maps;
212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
213
214 int tracing_set_tracer(struct trace_array *tr, const char *buf);
215 static void ftrace_trace_userstack(struct trace_array *tr,
216 struct trace_buffer *buffer,
217 unsigned int trace_ctx);
218
219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
220 static char *default_bootup_tracer;
221
222 static bool allocate_snapshot;
223 static bool snapshot_at_boot;
224
225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
226 static int boot_instance_index;
227
228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
229 static int boot_snapshot_index;
230
set_cmdline_ftrace(char * str)231 static int __init set_cmdline_ftrace(char *str)
232 {
233 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
234 default_bootup_tracer = bootup_tracer_buf;
235 /* We are using ftrace early, expand it */
236 trace_set_ring_buffer_expanded(NULL);
237 return 1;
238 }
239 __setup("ftrace=", set_cmdline_ftrace);
240
ftrace_dump_on_oops_enabled(void)241 int ftrace_dump_on_oops_enabled(void)
242 {
243 if (!strcmp("0", ftrace_dump_on_oops))
244 return 0;
245 else
246 return 1;
247 }
248
set_ftrace_dump_on_oops(char * str)249 static int __init set_ftrace_dump_on_oops(char *str)
250 {
251 if (!*str) {
252 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
253 return 1;
254 }
255
256 if (*str == ',') {
257 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
258 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
259 return 1;
260 }
261
262 if (*str++ == '=') {
263 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
264 return 1;
265 }
266
267 return 0;
268 }
269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
270
stop_trace_on_warning(char * str)271 static int __init stop_trace_on_warning(char *str)
272 {
273 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
274 __disable_trace_on_warning = 1;
275 return 1;
276 }
277 __setup("traceoff_on_warning", stop_trace_on_warning);
278
boot_alloc_snapshot(char * str)279 static int __init boot_alloc_snapshot(char *str)
280 {
281 char *slot = boot_snapshot_info + boot_snapshot_index;
282 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
283 int ret;
284
285 if (str[0] == '=') {
286 str++;
287 if (strlen(str) >= left)
288 return -1;
289
290 ret = snprintf(slot, left, "%s\t", str);
291 boot_snapshot_index += ret;
292 } else {
293 allocate_snapshot = true;
294 /* We also need the main ring buffer expanded */
295 trace_set_ring_buffer_expanded(NULL);
296 }
297 return 1;
298 }
299 __setup("alloc_snapshot", boot_alloc_snapshot);
300
301
boot_snapshot(char * str)302 static int __init boot_snapshot(char *str)
303 {
304 snapshot_at_boot = true;
305 boot_alloc_snapshot(str);
306 return 1;
307 }
308 __setup("ftrace_boot_snapshot", boot_snapshot);
309
310
boot_instance(char * str)311 static int __init boot_instance(char *str)
312 {
313 char *slot = boot_instance_info + boot_instance_index;
314 int left = sizeof(boot_instance_info) - boot_instance_index;
315 int ret;
316
317 if (strlen(str) >= left)
318 return -1;
319
320 ret = snprintf(slot, left, "%s\t", str);
321 boot_instance_index += ret;
322
323 return 1;
324 }
325 __setup("trace_instance=", boot_instance);
326
327
328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
329
set_trace_boot_options(char * str)330 static int __init set_trace_boot_options(char *str)
331 {
332 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
333 return 1;
334 }
335 __setup("trace_options=", set_trace_boot_options);
336
337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
338 static char *trace_boot_clock __initdata;
339
set_trace_boot_clock(char * str)340 static int __init set_trace_boot_clock(char *str)
341 {
342 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
343 trace_boot_clock = trace_boot_clock_buf;
344 return 1;
345 }
346 __setup("trace_clock=", set_trace_boot_clock);
347
set_tracepoint_printk(char * str)348 static int __init set_tracepoint_printk(char *str)
349 {
350 /* Ignore the "tp_printk_stop_on_boot" param */
351 if (*str == '_')
352 return 0;
353
354 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
355 tracepoint_printk = 1;
356 return 1;
357 }
358 __setup("tp_printk", set_tracepoint_printk);
359
set_tracepoint_printk_stop(char * str)360 static int __init set_tracepoint_printk_stop(char *str)
361 {
362 tracepoint_printk_stop_on_boot = true;
363 return 1;
364 }
365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
366
set_traceoff_after_boot(char * str)367 static int __init set_traceoff_after_boot(char *str)
368 {
369 traceoff_after_boot = true;
370 return 1;
371 }
372 __setup("traceoff_after_boot", set_traceoff_after_boot);
373
ns2usecs(u64 nsec)374 unsigned long long ns2usecs(u64 nsec)
375 {
376 nsec += 500;
377 do_div(nsec, 1000);
378 return nsec;
379 }
380
381 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)382 trace_process_export(struct trace_export *export,
383 struct ring_buffer_event *event, int flag)
384 {
385 struct trace_entry *entry;
386 unsigned int size = 0;
387
388 if (export->flags & flag) {
389 entry = ring_buffer_event_data(event);
390 size = ring_buffer_event_length(event);
391 export->write(export, entry, size);
392 }
393 }
394
395 static DEFINE_MUTEX(ftrace_export_lock);
396
397 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
398
399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
402
ftrace_exports_enable(struct trace_export * export)403 static inline void ftrace_exports_enable(struct trace_export *export)
404 {
405 if (export->flags & TRACE_EXPORT_FUNCTION)
406 static_branch_inc(&trace_function_exports_enabled);
407
408 if (export->flags & TRACE_EXPORT_EVENT)
409 static_branch_inc(&trace_event_exports_enabled);
410
411 if (export->flags & TRACE_EXPORT_MARKER)
412 static_branch_inc(&trace_marker_exports_enabled);
413 }
414
ftrace_exports_disable(struct trace_export * export)415 static inline void ftrace_exports_disable(struct trace_export *export)
416 {
417 if (export->flags & TRACE_EXPORT_FUNCTION)
418 static_branch_dec(&trace_function_exports_enabled);
419
420 if (export->flags & TRACE_EXPORT_EVENT)
421 static_branch_dec(&trace_event_exports_enabled);
422
423 if (export->flags & TRACE_EXPORT_MARKER)
424 static_branch_dec(&trace_marker_exports_enabled);
425 }
426
ftrace_exports(struct ring_buffer_event * event,int flag)427 static void ftrace_exports(struct ring_buffer_event *event, int flag)
428 {
429 struct trace_export *export;
430
431 guard(preempt_notrace)();
432
433 export = rcu_dereference_raw_check(ftrace_exports_list);
434 while (export) {
435 trace_process_export(export, event, flag);
436 export = rcu_dereference_raw_check(export->next);
437 }
438 }
439
440 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)441 add_trace_export(struct trace_export **list, struct trace_export *export)
442 {
443 rcu_assign_pointer(export->next, *list);
444 /*
445 * We are entering export into the list but another
446 * CPU might be walking that list. We need to make sure
447 * the export->next pointer is valid before another CPU sees
448 * the export pointer included into the list.
449 */
450 rcu_assign_pointer(*list, export);
451 }
452
453 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)454 rm_trace_export(struct trace_export **list, struct trace_export *export)
455 {
456 struct trace_export **p;
457
458 for (p = list; *p != NULL; p = &(*p)->next)
459 if (*p == export)
460 break;
461
462 if (*p != export)
463 return -1;
464
465 rcu_assign_pointer(*p, (*p)->next);
466
467 return 0;
468 }
469
470 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)471 add_ftrace_export(struct trace_export **list, struct trace_export *export)
472 {
473 ftrace_exports_enable(export);
474
475 add_trace_export(list, export);
476 }
477
478 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)479 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
480 {
481 int ret;
482
483 ret = rm_trace_export(list, export);
484 ftrace_exports_disable(export);
485
486 return ret;
487 }
488
register_ftrace_export(struct trace_export * export)489 int register_ftrace_export(struct trace_export *export)
490 {
491 if (WARN_ON_ONCE(!export->write))
492 return -1;
493
494 guard(mutex)(&ftrace_export_lock);
495
496 add_ftrace_export(&ftrace_exports_list, export);
497
498 return 0;
499 }
500 EXPORT_SYMBOL_GPL(register_ftrace_export);
501
unregister_ftrace_export(struct trace_export * export)502 int unregister_ftrace_export(struct trace_export *export)
503 {
504 guard(mutex)(&ftrace_export_lock);
505 return rm_ftrace_export(&ftrace_exports_list, export);
506 }
507 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
508
509 /* trace_flags holds trace_options default values */
510 #define TRACE_DEFAULT_FLAGS \
511 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
512 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
513 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
514 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
515 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
516 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
517 TRACE_ITER(COPY_MARKER))
518
519 /* trace_options that are only supported by global_trace */
520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
521 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
522 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
523
524 /* trace_flags that are default zero for instances */
525 #define ZEROED_TRACE_FLAGS \
526 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
527 TRACE_ITER(COPY_MARKER))
528
529 /*
530 * The global_trace is the descriptor that holds the top-level tracing
531 * buffers for the live tracing.
532 */
533 static struct trace_array global_trace = {
534 .trace_flags = TRACE_DEFAULT_FLAGS,
535 };
536
537 struct trace_array *printk_trace = &global_trace;
538
539 /* List of trace_arrays interested in the top level trace_marker */
540 static LIST_HEAD(marker_copies);
541
update_printk_trace(struct trace_array * tr)542 static void update_printk_trace(struct trace_array *tr)
543 {
544 if (printk_trace == tr)
545 return;
546
547 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
548 printk_trace = tr;
549 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
550 }
551
552 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)553 static bool update_marker_trace(struct trace_array *tr, int enabled)
554 {
555 lockdep_assert_held(&event_mutex);
556
557 if (enabled) {
558 if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
559 return false;
560
561 list_add_rcu(&tr->marker_list, &marker_copies);
562 tr->trace_flags |= TRACE_ITER(COPY_MARKER);
563 return true;
564 }
565
566 if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
567 return false;
568
569 list_del_rcu(&tr->marker_list);
570 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
571 return true;
572 }
573
trace_set_ring_buffer_expanded(struct trace_array * tr)574 void trace_set_ring_buffer_expanded(struct trace_array *tr)
575 {
576 if (!tr)
577 tr = &global_trace;
578 tr->ring_buffer_expanded = true;
579 }
580
581 LIST_HEAD(ftrace_trace_arrays);
582
trace_array_get(struct trace_array * this_tr)583 int trace_array_get(struct trace_array *this_tr)
584 {
585 struct trace_array *tr;
586
587 guard(mutex)(&trace_types_lock);
588 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
589 if (tr == this_tr) {
590 tr->ref++;
591 return 0;
592 }
593 }
594
595 return -ENODEV;
596 }
597
__trace_array_put(struct trace_array * this_tr)598 static void __trace_array_put(struct trace_array *this_tr)
599 {
600 WARN_ON(!this_tr->ref);
601 this_tr->ref--;
602 }
603
604 /**
605 * trace_array_put - Decrement the reference counter for this trace array.
606 * @this_tr : pointer to the trace array
607 *
608 * NOTE: Use this when we no longer need the trace array returned by
609 * trace_array_get_by_name(). This ensures the trace array can be later
610 * destroyed.
611 *
612 */
trace_array_put(struct trace_array * this_tr)613 void trace_array_put(struct trace_array *this_tr)
614 {
615 if (!this_tr)
616 return;
617
618 guard(mutex)(&trace_types_lock);
619 __trace_array_put(this_tr);
620 }
621 EXPORT_SYMBOL_GPL(trace_array_put);
622
tracing_check_open_get_tr(struct trace_array * tr)623 int tracing_check_open_get_tr(struct trace_array *tr)
624 {
625 int ret;
626
627 ret = security_locked_down(LOCKDOWN_TRACEFS);
628 if (ret)
629 return ret;
630
631 if (tracing_disabled)
632 return -ENODEV;
633
634 if (tr && trace_array_get(tr) < 0)
635 return -ENODEV;
636
637 return 0;
638 }
639
buffer_ftrace_now(struct array_buffer * buf,int cpu)640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
641 {
642 u64 ts;
643
644 /* Early boot up does not have a buffer yet */
645 if (!buf->buffer)
646 return trace_clock_local();
647
648 ts = ring_buffer_time_stamp(buf->buffer);
649 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
650
651 return ts;
652 }
653
ftrace_now(int cpu)654 u64 ftrace_now(int cpu)
655 {
656 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
657 }
658
659 /**
660 * tracing_is_enabled - Show if global_trace has been enabled
661 *
662 * Shows if the global trace has been enabled or not. It uses the
663 * mirror flag "buffer_disabled" to be used in fast paths such as for
664 * the irqsoff tracer. But it may be inaccurate due to races. If you
665 * need to know the accurate state, use tracing_is_on() which is a little
666 * slower, but accurate.
667 */
tracing_is_enabled(void)668 int tracing_is_enabled(void)
669 {
670 /*
671 * For quick access (irqsoff uses this in fast path), just
672 * return the mirror variable of the state of the ring buffer.
673 * It's a little racy, but we don't really care.
674 */
675 return !global_trace.buffer_disabled;
676 }
677
678 /*
679 * trace_buf_size is the size in bytes that is allocated
680 * for a buffer. Note, the number of bytes is always rounded
681 * to page size.
682 *
683 * This number is purposely set to a low number of 16384.
684 * If the dump on oops happens, it will be much appreciated
685 * to not have to wait for all that output. Anyway this can be
686 * boot time and run time configurable.
687 */
688 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
689
690 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
691
692 /* trace_types holds a link list of available tracers. */
693 static struct tracer *trace_types __read_mostly;
694
695 /*
696 * trace_types_lock is used to protect the trace_types list.
697 */
698 DEFINE_MUTEX(trace_types_lock);
699
700 /*
701 * serialize the access of the ring buffer
702 *
703 * ring buffer serializes readers, but it is low level protection.
704 * The validity of the events (which returns by ring_buffer_peek() ..etc)
705 * are not protected by ring buffer.
706 *
707 * The content of events may become garbage if we allow other process consumes
708 * these events concurrently:
709 * A) the page of the consumed events may become a normal page
710 * (not reader page) in ring buffer, and this page will be rewritten
711 * by events producer.
712 * B) The page of the consumed events may become a page for splice_read,
713 * and this page will be returned to system.
714 *
715 * These primitives allow multi process access to different cpu ring buffer
716 * concurrently.
717 *
718 * These primitives don't distinguish read-only and read-consume access.
719 * Multi read-only access are also serialized.
720 */
721
722 #ifdef CONFIG_SMP
723 static DECLARE_RWSEM(all_cpu_access_lock);
724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
725
trace_access_lock(int cpu)726 static inline void trace_access_lock(int cpu)
727 {
728 if (cpu == RING_BUFFER_ALL_CPUS) {
729 /* gain it for accessing the whole ring buffer. */
730 down_write(&all_cpu_access_lock);
731 } else {
732 /* gain it for accessing a cpu ring buffer. */
733
734 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
735 down_read(&all_cpu_access_lock);
736
737 /* Secondly block other access to this @cpu ring buffer. */
738 mutex_lock(&per_cpu(cpu_access_lock, cpu));
739 }
740 }
741
trace_access_unlock(int cpu)742 static inline void trace_access_unlock(int cpu)
743 {
744 if (cpu == RING_BUFFER_ALL_CPUS) {
745 up_write(&all_cpu_access_lock);
746 } else {
747 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
748 up_read(&all_cpu_access_lock);
749 }
750 }
751
trace_access_lock_init(void)752 static inline void trace_access_lock_init(void)
753 {
754 int cpu;
755
756 for_each_possible_cpu(cpu)
757 mutex_init(&per_cpu(cpu_access_lock, cpu));
758 }
759
760 #else
761
762 static DEFINE_MUTEX(access_lock);
763
trace_access_lock(int cpu)764 static inline void trace_access_lock(int cpu)
765 {
766 (void)cpu;
767 mutex_lock(&access_lock);
768 }
769
trace_access_unlock(int cpu)770 static inline void trace_access_unlock(int cpu)
771 {
772 (void)cpu;
773 mutex_unlock(&access_lock);
774 }
775
trace_access_lock_init(void)776 static inline void trace_access_lock_init(void)
777 {
778 }
779
780 #endif
781
tracer_tracing_on(struct trace_array * tr)782 void tracer_tracing_on(struct trace_array *tr)
783 {
784 if (tr->array_buffer.buffer)
785 ring_buffer_record_on(tr->array_buffer.buffer);
786 /*
787 * This flag is looked at when buffers haven't been allocated
788 * yet, or by some tracers (like irqsoff), that just want to
789 * know if the ring buffer has been disabled, but it can handle
790 * races of where it gets disabled but we still do a record.
791 * As the check is in the fast path of the tracers, it is more
792 * important to be fast than accurate.
793 */
794 tr->buffer_disabled = 0;
795 }
796
797 /**
798 * tracing_on - enable tracing buffers
799 *
800 * This function enables tracing buffers that may have been
801 * disabled with tracing_off.
802 */
tracing_on(void)803 void tracing_on(void)
804 {
805 tracer_tracing_on(&global_trace);
806 }
807 EXPORT_SYMBOL_GPL(tracing_on);
808
809 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)810 static void tracing_snapshot_instance_cond(struct trace_array *tr,
811 void *cond_data)
812 {
813 unsigned long flags;
814
815 if (in_nmi()) {
816 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
817 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
818 return;
819 }
820
821 if (!tr->allocated_snapshot) {
822 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
823 trace_array_puts(tr, "*** stopping trace here! ***\n");
824 tracer_tracing_off(tr);
825 return;
826 }
827
828 if (tr->mapped) {
829 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
830 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
831 return;
832 }
833
834 /* Note, snapshot can not be used when the tracer uses it */
835 if (tracer_uses_snapshot(tr->current_trace)) {
836 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
837 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
838 return;
839 }
840
841 local_irq_save(flags);
842 update_max_tr(tr, current, smp_processor_id(), cond_data);
843 local_irq_restore(flags);
844 }
845
tracing_snapshot_instance(struct trace_array * tr)846 void tracing_snapshot_instance(struct trace_array *tr)
847 {
848 tracing_snapshot_instance_cond(tr, NULL);
849 }
850
851 /**
852 * tracing_snapshot - take a snapshot of the current buffer.
853 *
854 * This causes a swap between the snapshot buffer and the current live
855 * tracing buffer. You can use this to take snapshots of the live
856 * trace when some condition is triggered, but continue to trace.
857 *
858 * Note, make sure to allocate the snapshot with either
859 * a tracing_snapshot_alloc(), or by doing it manually
860 * with: echo 1 > /sys/kernel/tracing/snapshot
861 *
862 * If the snapshot buffer is not allocated, it will stop tracing.
863 * Basically making a permanent snapshot.
864 */
tracing_snapshot(void)865 void tracing_snapshot(void)
866 {
867 struct trace_array *tr = &global_trace;
868
869 tracing_snapshot_instance(tr);
870 }
871 EXPORT_SYMBOL_GPL(tracing_snapshot);
872
873 /**
874 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
875 * @tr: The tracing instance to snapshot
876 * @cond_data: The data to be tested conditionally, and possibly saved
877 *
878 * This is the same as tracing_snapshot() except that the snapshot is
879 * conditional - the snapshot will only happen if the
880 * cond_snapshot.update() implementation receiving the cond_data
881 * returns true, which means that the trace array's cond_snapshot
882 * update() operation used the cond_data to determine whether the
883 * snapshot should be taken, and if it was, presumably saved it along
884 * with the snapshot.
885 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
887 {
888 tracing_snapshot_instance_cond(tr, cond_data);
889 }
890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
891
892 /**
893 * tracing_cond_snapshot_data - get the user data associated with a snapshot
894 * @tr: The tracing instance
895 *
896 * When the user enables a conditional snapshot using
897 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
898 * with the snapshot. This accessor is used to retrieve it.
899 *
900 * Should not be called from cond_snapshot.update(), since it takes
901 * the tr->max_lock lock, which the code calling
902 * cond_snapshot.update() has already done.
903 *
904 * Returns the cond_data associated with the trace array's snapshot.
905 */
tracing_cond_snapshot_data(struct trace_array * tr)906 void *tracing_cond_snapshot_data(struct trace_array *tr)
907 {
908 void *cond_data = NULL;
909
910 local_irq_disable();
911 arch_spin_lock(&tr->max_lock);
912
913 if (tr->cond_snapshot)
914 cond_data = tr->cond_snapshot->cond_data;
915
916 arch_spin_unlock(&tr->max_lock);
917 local_irq_enable();
918
919 return cond_data;
920 }
921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
922
923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
924 struct array_buffer *size_buf, int cpu_id);
925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
926
tracing_alloc_snapshot_instance(struct trace_array * tr)927 int tracing_alloc_snapshot_instance(struct trace_array *tr)
928 {
929 int order;
930 int ret;
931
932 if (!tr->allocated_snapshot) {
933
934 /* Make the snapshot buffer have the same order as main buffer */
935 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
936 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
937 if (ret < 0)
938 return ret;
939
940 /* allocate spare buffer */
941 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
942 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
943 if (ret < 0)
944 return ret;
945
946 tr->allocated_snapshot = true;
947 }
948
949 return 0;
950 }
951
free_snapshot(struct trace_array * tr)952 static void free_snapshot(struct trace_array *tr)
953 {
954 /*
955 * We don't free the ring buffer. instead, resize it because
956 * The max_tr ring buffer has some state (e.g. ring->clock) and
957 * we want preserve it.
958 */
959 ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
960 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
961 set_buffer_entries(&tr->snapshot_buffer, 1);
962 tracing_reset_online_cpus(&tr->snapshot_buffer);
963 tr->allocated_snapshot = false;
964 }
965
tracing_arm_snapshot_locked(struct trace_array * tr)966 static int tracing_arm_snapshot_locked(struct trace_array *tr)
967 {
968 int ret;
969
970 lockdep_assert_held(&trace_types_lock);
971
972 spin_lock(&tr->snapshot_trigger_lock);
973 if (tr->snapshot == UINT_MAX || tr->mapped) {
974 spin_unlock(&tr->snapshot_trigger_lock);
975 return -EBUSY;
976 }
977
978 tr->snapshot++;
979 spin_unlock(&tr->snapshot_trigger_lock);
980
981 ret = tracing_alloc_snapshot_instance(tr);
982 if (ret) {
983 spin_lock(&tr->snapshot_trigger_lock);
984 tr->snapshot--;
985 spin_unlock(&tr->snapshot_trigger_lock);
986 }
987
988 return ret;
989 }
990
tracing_arm_snapshot(struct trace_array * tr)991 int tracing_arm_snapshot(struct trace_array *tr)
992 {
993 guard(mutex)(&trace_types_lock);
994 return tracing_arm_snapshot_locked(tr);
995 }
996
tracing_disarm_snapshot(struct trace_array * tr)997 void tracing_disarm_snapshot(struct trace_array *tr)
998 {
999 spin_lock(&tr->snapshot_trigger_lock);
1000 if (!WARN_ON(!tr->snapshot))
1001 tr->snapshot--;
1002 spin_unlock(&tr->snapshot_trigger_lock);
1003 }
1004
1005 /**
1006 * tracing_alloc_snapshot - allocate snapshot buffer.
1007 *
1008 * This only allocates the snapshot buffer if it isn't already
1009 * allocated - it doesn't also take a snapshot.
1010 *
1011 * This is meant to be used in cases where the snapshot buffer needs
1012 * to be set up for events that can't sleep but need to be able to
1013 * trigger a snapshot.
1014 */
tracing_alloc_snapshot(void)1015 int tracing_alloc_snapshot(void)
1016 {
1017 struct trace_array *tr = &global_trace;
1018 int ret;
1019
1020 ret = tracing_alloc_snapshot_instance(tr);
1021 WARN_ON(ret < 0);
1022
1023 return ret;
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1026
1027 /**
1028 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1029 *
1030 * This is similar to tracing_snapshot(), but it will allocate the
1031 * snapshot buffer if it isn't already allocated. Use this only
1032 * where it is safe to sleep, as the allocation may sleep.
1033 *
1034 * This causes a swap between the snapshot buffer and the current live
1035 * tracing buffer. You can use this to take snapshots of the live
1036 * trace when some condition is triggered, but continue to trace.
1037 */
tracing_snapshot_alloc(void)1038 void tracing_snapshot_alloc(void)
1039 {
1040 int ret;
1041
1042 ret = tracing_alloc_snapshot();
1043 if (ret < 0)
1044 return;
1045
1046 tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049
1050 /**
1051 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1052 * @tr: The tracing instance
1053 * @cond_data: User data to associate with the snapshot
1054 * @update: Implementation of the cond_snapshot update function
1055 *
1056 * Check whether the conditional snapshot for the given instance has
1057 * already been enabled, or if the current tracer is already using a
1058 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1059 * save the cond_data and update function inside.
1060 *
1061 * Returns 0 if successful, error otherwise.
1062 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1064 cond_update_fn_t update)
1065 {
1066 struct cond_snapshot *cond_snapshot __free(kfree) =
1067 kzalloc_obj(*cond_snapshot);
1068 int ret;
1069
1070 if (!cond_snapshot)
1071 return -ENOMEM;
1072
1073 cond_snapshot->cond_data = cond_data;
1074 cond_snapshot->update = update;
1075
1076 guard(mutex)(&trace_types_lock);
1077
1078 if (tracer_uses_snapshot(tr->current_trace))
1079 return -EBUSY;
1080
1081 /*
1082 * The cond_snapshot can only change to NULL without the
1083 * trace_types_lock. We don't care if we race with it going
1084 * to NULL, but we want to make sure that it's not set to
1085 * something other than NULL when we get here, which we can
1086 * do safely with only holding the trace_types_lock and not
1087 * having to take the max_lock.
1088 */
1089 if (tr->cond_snapshot)
1090 return -EBUSY;
1091
1092 ret = tracing_arm_snapshot_locked(tr);
1093 if (ret)
1094 return ret;
1095
1096 local_irq_disable();
1097 arch_spin_lock(&tr->max_lock);
1098 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1099 arch_spin_unlock(&tr->max_lock);
1100 local_irq_enable();
1101
1102 return 0;
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1105
1106 /**
1107 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1108 * @tr: The tracing instance
1109 *
1110 * Check whether the conditional snapshot for the given instance is
1111 * enabled; if so, free the cond_snapshot associated with it,
1112 * otherwise return -EINVAL.
1113 *
1114 * Returns 0 if successful, error otherwise.
1115 */
tracing_snapshot_cond_disable(struct trace_array * tr)1116 int tracing_snapshot_cond_disable(struct trace_array *tr)
1117 {
1118 int ret = 0;
1119
1120 local_irq_disable();
1121 arch_spin_lock(&tr->max_lock);
1122
1123 if (!tr->cond_snapshot)
1124 ret = -EINVAL;
1125 else {
1126 kfree(tr->cond_snapshot);
1127 tr->cond_snapshot = NULL;
1128 }
1129
1130 arch_spin_unlock(&tr->max_lock);
1131 local_irq_enable();
1132
1133 tracing_disarm_snapshot(tr);
1134
1135 return ret;
1136 }
1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1138 #else
tracing_snapshot(void)1139 void tracing_snapshot(void)
1140 {
1141 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1142 }
1143 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1145 {
1146 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1147 }
1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1149 int tracing_alloc_snapshot(void)
1150 {
1151 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1152 return -ENODEV;
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1155 void tracing_snapshot_alloc(void)
1156 {
1157 /* Give warning */
1158 tracing_snapshot();
1159 }
1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1161 void *tracing_cond_snapshot_data(struct trace_array *tr)
1162 {
1163 return NULL;
1164 }
1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1167 {
1168 return -ENODEV;
1169 }
1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1171 int tracing_snapshot_cond_disable(struct trace_array *tr)
1172 {
1173 return false;
1174 }
1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1176 #define free_snapshot(tr) do { } while (0)
1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1178 #endif /* CONFIG_TRACER_SNAPSHOT */
1179
tracer_tracing_off(struct trace_array * tr)1180 void tracer_tracing_off(struct trace_array *tr)
1181 {
1182 if (tr->array_buffer.buffer)
1183 ring_buffer_record_off(tr->array_buffer.buffer);
1184 /*
1185 * This flag is looked at when buffers haven't been allocated
1186 * yet, or by some tracers (like irqsoff), that just want to
1187 * know if the ring buffer has been disabled, but it can handle
1188 * races of where it gets disabled but we still do a record.
1189 * As the check is in the fast path of the tracers, it is more
1190 * important to be fast than accurate.
1191 */
1192 tr->buffer_disabled = 1;
1193 }
1194
1195 /**
1196 * tracer_tracing_disable() - temporary disable the buffer from write
1197 * @tr: The trace array to disable its buffer for
1198 *
1199 * Expects trace_tracing_enable() to re-enable tracing.
1200 * The difference between this and tracer_tracing_off() is that this
1201 * is a counter and can nest, whereas, tracer_tracing_off() can
1202 * be called multiple times and a single trace_tracing_on() will
1203 * enable it.
1204 */
tracer_tracing_disable(struct trace_array * tr)1205 void tracer_tracing_disable(struct trace_array *tr)
1206 {
1207 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1208 return;
1209
1210 ring_buffer_record_disable(tr->array_buffer.buffer);
1211 }
1212
1213 /**
1214 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1215 * @tr: The trace array that had tracer_tracincg_disable() called on it
1216 *
1217 * This is called after tracer_tracing_disable() has been called on @tr,
1218 * when it's safe to re-enable tracing.
1219 */
tracer_tracing_enable(struct trace_array * tr)1220 void tracer_tracing_enable(struct trace_array *tr)
1221 {
1222 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1223 return;
1224
1225 ring_buffer_record_enable(tr->array_buffer.buffer);
1226 }
1227
1228 /**
1229 * tracing_off - turn off tracing buffers
1230 *
1231 * This function stops the tracing buffers from recording data.
1232 * It does not disable any overhead the tracers themselves may
1233 * be causing. This function simply causes all recording to
1234 * the ring buffers to fail.
1235 */
tracing_off(void)1236 void tracing_off(void)
1237 {
1238 tracer_tracing_off(&global_trace);
1239 }
1240 EXPORT_SYMBOL_GPL(tracing_off);
1241
disable_trace_on_warning(void)1242 void disable_trace_on_warning(void)
1243 {
1244 if (__disable_trace_on_warning) {
1245 struct trace_array *tr = READ_ONCE(printk_trace);
1246
1247 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1248 "Disabling tracing due to warning\n");
1249 tracing_off();
1250
1251 /* Disable trace_printk() buffer too */
1252 if (tr != &global_trace) {
1253 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1254 "Disabling tracing due to warning\n");
1255 tracer_tracing_off(tr);
1256 }
1257 }
1258 }
1259
1260 /**
1261 * tracer_tracing_is_on - show real state of ring buffer enabled
1262 * @tr : the trace array to know if ring buffer is enabled
1263 *
1264 * Shows real state of the ring buffer if it is enabled or not.
1265 */
tracer_tracing_is_on(struct trace_array * tr)1266 bool tracer_tracing_is_on(struct trace_array *tr)
1267 {
1268 if (tr->array_buffer.buffer)
1269 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1270 return !tr->buffer_disabled;
1271 }
1272
1273 /**
1274 * tracing_is_on - show state of ring buffers enabled
1275 */
tracing_is_on(void)1276 int tracing_is_on(void)
1277 {
1278 return tracer_tracing_is_on(&global_trace);
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_is_on);
1281
set_buf_size(char * str)1282 static int __init set_buf_size(char *str)
1283 {
1284 unsigned long buf_size;
1285
1286 if (!str)
1287 return 0;
1288 buf_size = memparse(str, &str);
1289 /*
1290 * nr_entries can not be zero and the startup
1291 * tests require some buffer space. Therefore
1292 * ensure we have at least 4096 bytes of buffer.
1293 */
1294 trace_buf_size = max(4096UL, buf_size);
1295 return 1;
1296 }
1297 __setup("trace_buf_size=", set_buf_size);
1298
set_tracing_thresh(char * str)1299 static int __init set_tracing_thresh(char *str)
1300 {
1301 unsigned long threshold;
1302 int ret;
1303
1304 if (!str)
1305 return 0;
1306 ret = kstrtoul(str, 0, &threshold);
1307 if (ret < 0)
1308 return 0;
1309 tracing_thresh = threshold * 1000;
1310 return 1;
1311 }
1312 __setup("tracing_thresh=", set_tracing_thresh);
1313
nsecs_to_usecs(unsigned long nsecs)1314 unsigned long nsecs_to_usecs(unsigned long nsecs)
1315 {
1316 return nsecs / 1000;
1317 }
1318
1319 /*
1320 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1321 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1322 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1323 * of strings in the order that the evals (enum) were defined.
1324 */
1325 #undef C
1326 #define C(a, b) b
1327
1328 /* These must match the bit positions in trace_iterator_flags */
1329 static const char *trace_options[] = {
1330 TRACE_FLAGS
1331 NULL
1332 };
1333
1334 static struct {
1335 u64 (*func)(void);
1336 const char *name;
1337 int in_ns; /* is this clock in nanoseconds? */
1338 } trace_clocks[] = {
1339 { trace_clock_local, "local", 1 },
1340 { trace_clock_global, "global", 1 },
1341 { trace_clock_counter, "counter", 0 },
1342 { trace_clock_jiffies, "uptime", 0 },
1343 { trace_clock, "perf", 1 },
1344 { ktime_get_mono_fast_ns, "mono", 1 },
1345 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1346 { ktime_get_boot_fast_ns, "boot", 1 },
1347 { ktime_get_tai_fast_ns, "tai", 1 },
1348 ARCH_TRACE_CLOCKS
1349 };
1350
trace_clock_in_ns(struct trace_array * tr)1351 bool trace_clock_in_ns(struct trace_array *tr)
1352 {
1353 if (trace_clocks[tr->clock_id].in_ns)
1354 return true;
1355
1356 return false;
1357 }
1358
1359 /*
1360 * trace_parser_get_init - gets the buffer for trace parser
1361 */
trace_parser_get_init(struct trace_parser * parser,int size)1362 int trace_parser_get_init(struct trace_parser *parser, int size)
1363 {
1364 memset(parser, 0, sizeof(*parser));
1365
1366 parser->buffer = kmalloc(size, GFP_KERNEL);
1367 if (!parser->buffer)
1368 return 1;
1369
1370 parser->size = size;
1371 return 0;
1372 }
1373
1374 /*
1375 * trace_parser_put - frees the buffer for trace parser
1376 */
trace_parser_put(struct trace_parser * parser)1377 void trace_parser_put(struct trace_parser *parser)
1378 {
1379 kfree(parser->buffer);
1380 parser->buffer = NULL;
1381 }
1382
1383 /*
1384 * trace_get_user - reads the user input string separated by space
1385 * (matched by isspace(ch))
1386 *
1387 * For each string found the 'struct trace_parser' is updated,
1388 * and the function returns.
1389 *
1390 * Returns number of bytes read.
1391 *
1392 * See kernel/trace/trace.h for 'struct trace_parser' details.
1393 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1395 size_t cnt, loff_t *ppos)
1396 {
1397 char ch;
1398 size_t read = 0;
1399 ssize_t ret;
1400
1401 if (!*ppos)
1402 trace_parser_clear(parser);
1403
1404 ret = get_user(ch, ubuf++);
1405 if (ret)
1406 goto fail;
1407
1408 read++;
1409 cnt--;
1410
1411 /*
1412 * The parser is not finished with the last write,
1413 * continue reading the user input without skipping spaces.
1414 */
1415 if (!parser->cont) {
1416 /* skip white space */
1417 while (cnt && isspace(ch)) {
1418 ret = get_user(ch, ubuf++);
1419 if (ret)
1420 goto fail;
1421 read++;
1422 cnt--;
1423 }
1424
1425 parser->idx = 0;
1426
1427 /* only spaces were written */
1428 if (isspace(ch) || !ch) {
1429 *ppos += read;
1430 return read;
1431 }
1432 }
1433
1434 /* read the non-space input */
1435 while (cnt && !isspace(ch) && ch) {
1436 if (parser->idx < parser->size - 1)
1437 parser->buffer[parser->idx++] = ch;
1438 else {
1439 ret = -EINVAL;
1440 goto fail;
1441 }
1442
1443 ret = get_user(ch, ubuf++);
1444 if (ret)
1445 goto fail;
1446 read++;
1447 cnt--;
1448 }
1449
1450 /* We either got finished input or we have to wait for another call. */
1451 if (isspace(ch) || !ch) {
1452 parser->buffer[parser->idx] = 0;
1453 parser->cont = false;
1454 } else if (parser->idx < parser->size - 1) {
1455 parser->cont = true;
1456 parser->buffer[parser->idx++] = ch;
1457 /* Make sure the parsed string always terminates with '\0'. */
1458 parser->buffer[parser->idx] = 0;
1459 } else {
1460 ret = -EINVAL;
1461 goto fail;
1462 }
1463
1464 *ppos += read;
1465 return read;
1466 fail:
1467 trace_parser_fail(parser);
1468 return ret;
1469 }
1470
1471 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1473 {
1474 int len;
1475
1476 if (trace_seq_used(s) <= s->readpos)
1477 return -EBUSY;
1478
1479 len = trace_seq_used(s) - s->readpos;
1480 if (cnt > len)
1481 cnt = len;
1482 memcpy(buf, s->buffer + s->readpos, cnt);
1483
1484 s->readpos += cnt;
1485 return cnt;
1486 }
1487
1488 unsigned long __read_mostly tracing_thresh;
1489
1490 #ifdef CONFIG_TRACER_MAX_TRACE
1491 #ifdef LATENCY_FS_NOTIFY
1492 static struct workqueue_struct *fsnotify_wq;
1493
latency_fsnotify_workfn(struct work_struct * work)1494 static void latency_fsnotify_workfn(struct work_struct *work)
1495 {
1496 struct trace_array *tr = container_of(work, struct trace_array,
1497 fsnotify_work);
1498 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1499 }
1500
latency_fsnotify_workfn_irq(struct irq_work * iwork)1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1502 {
1503 struct trace_array *tr = container_of(iwork, struct trace_array,
1504 fsnotify_irqwork);
1505 queue_work(fsnotify_wq, &tr->fsnotify_work);
1506 }
1507
latency_fsnotify_init(void)1508 __init static int latency_fsnotify_init(void)
1509 {
1510 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1511 WQ_UNBOUND | WQ_HIGHPRI, 0);
1512 if (!fsnotify_wq) {
1513 pr_err("Unable to allocate tr_max_lat_wq\n");
1514 return -ENOMEM;
1515 }
1516 return 0;
1517 }
1518
1519 late_initcall_sync(latency_fsnotify_init);
1520
latency_fsnotify(struct trace_array * tr)1521 void latency_fsnotify(struct trace_array *tr)
1522 {
1523 if (!fsnotify_wq)
1524 return;
1525 /*
1526 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1527 * possible that we are called from __schedule() or do_idle(), which
1528 * could cause a deadlock.
1529 */
1530 irq_work_queue(&tr->fsnotify_irqwork);
1531 }
1532 #endif /* !LATENCY_FS_NOTIFY */
1533
1534 static const struct file_operations tracing_max_lat_fops;
1535
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1536 static void trace_create_maxlat_file(struct trace_array *tr,
1537 struct dentry *d_tracer)
1538 {
1539 #ifdef LATENCY_FS_NOTIFY
1540 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1541 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1542 #endif
1543 tr->d_max_latency = trace_create_file("tracing_max_latency",
1544 TRACE_MODE_WRITE,
1545 d_tracer, tr,
1546 &tracing_max_lat_fops);
1547 }
1548
1549 /*
1550 * Copy the new maximum trace into the separate maximum-trace
1551 * structure. (this way the maximum trace is permanently saved,
1552 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1553 */
1554 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1556 {
1557 struct array_buffer *trace_buf = &tr->array_buffer;
1558 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1559 struct array_buffer *max_buf = &tr->snapshot_buffer;
1560 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1561
1562 max_buf->cpu = cpu;
1563 max_buf->time_start = data->preempt_timestamp;
1564
1565 max_data->saved_latency = tr->max_latency;
1566 max_data->critical_start = data->critical_start;
1567 max_data->critical_end = data->critical_end;
1568
1569 strscpy(max_data->comm, tsk->comm);
1570 max_data->pid = tsk->pid;
1571 /*
1572 * If tsk == current, then use current_uid(), as that does not use
1573 * RCU. The irq tracer can be called out of RCU scope.
1574 */
1575 if (tsk == current)
1576 max_data->uid = current_uid();
1577 else
1578 max_data->uid = task_uid(tsk);
1579
1580 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1581 max_data->policy = tsk->policy;
1582 max_data->rt_priority = tsk->rt_priority;
1583
1584 /* record this tasks comm */
1585 tracing_record_cmdline(tsk);
1586 latency_fsnotify(tr);
1587 }
1588 #else
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1589 static inline void trace_create_maxlat_file(struct trace_array *tr,
1590 struct dentry *d_tracer) { }
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1591 static inline void __update_max_tr(struct trace_array *tr,
1592 struct task_struct *tsk, int cpu) { }
1593 #endif /* CONFIG_TRACER_MAX_TRACE */
1594
1595 #ifdef CONFIG_TRACER_SNAPSHOT
1596 /**
1597 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1598 * @tr: tracer
1599 * @tsk: the task with the latency
1600 * @cpu: The cpu that initiated the trace.
1601 * @cond_data: User data associated with a conditional snapshot
1602 *
1603 * Flip the buffers between the @tr and the max_tr and record information
1604 * about which task was the cause of this latency.
1605 */
1606 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1608 void *cond_data)
1609 {
1610 if (tr->stop_count)
1611 return;
1612
1613 WARN_ON_ONCE(!irqs_disabled());
1614
1615 if (!tr->allocated_snapshot) {
1616 /* Only the nop tracer should hit this when disabling */
1617 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1618 return;
1619 }
1620
1621 arch_spin_lock(&tr->max_lock);
1622
1623 /* Inherit the recordable setting from array_buffer */
1624 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1625 ring_buffer_record_on(tr->snapshot_buffer.buffer);
1626 else
1627 ring_buffer_record_off(tr->snapshot_buffer.buffer);
1628
1629 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1630 arch_spin_unlock(&tr->max_lock);
1631 return;
1632 }
1633
1634 swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
1635
1636 __update_max_tr(tr, tsk, cpu);
1637
1638 arch_spin_unlock(&tr->max_lock);
1639
1640 /* Any waiters on the old snapshot buffer need to wake up */
1641 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1642 }
1643
1644 /**
1645 * update_max_tr_single - only copy one trace over, and reset the rest
1646 * @tr: tracer
1647 * @tsk: task with the latency
1648 * @cpu: the cpu of the buffer to copy.
1649 *
1650 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1651 */
1652 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1654 {
1655 int ret;
1656
1657 if (tr->stop_count)
1658 return;
1659
1660 WARN_ON_ONCE(!irqs_disabled());
1661 if (!tr->allocated_snapshot) {
1662 /* Only the nop tracer should hit this when disabling */
1663 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1664 return;
1665 }
1666
1667 arch_spin_lock(&tr->max_lock);
1668
1669 ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
1670
1671 if (ret == -EBUSY) {
1672 /*
1673 * We failed to swap the buffer due to a commit taking
1674 * place on this CPU. We fail to record, but we reset
1675 * the max trace buffer (no one writes directly to it)
1676 * and flag that it failed.
1677 * Another reason is resize is in progress.
1678 */
1679 trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
1680 "Failed to swap buffers due to commit or resize in progress\n");
1681 }
1682
1683 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1684
1685 __update_max_tr(tr, tsk, cpu);
1686 arch_spin_unlock(&tr->max_lock);
1687 }
1688 #endif /* CONFIG_TRACER_SNAPSHOT */
1689
1690 struct pipe_wait {
1691 struct trace_iterator *iter;
1692 int wait_index;
1693 };
1694
wait_pipe_cond(void * data)1695 static bool wait_pipe_cond(void *data)
1696 {
1697 struct pipe_wait *pwait = data;
1698 struct trace_iterator *iter = pwait->iter;
1699
1700 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1701 return true;
1702
1703 return iter->closed;
1704 }
1705
wait_on_pipe(struct trace_iterator * iter,int full)1706 static int wait_on_pipe(struct trace_iterator *iter, int full)
1707 {
1708 struct pipe_wait pwait;
1709 int ret;
1710
1711 /* Iterators are static, they should be filled or empty */
1712 if (trace_buffer_iter(iter, iter->cpu_file))
1713 return 0;
1714
1715 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1716 pwait.iter = iter;
1717
1718 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1719 wait_pipe_cond, &pwait);
1720
1721 #ifdef CONFIG_TRACER_SNAPSHOT
1722 /*
1723 * Make sure this is still the snapshot buffer, as if a snapshot were
1724 * to happen, this would now be the main buffer.
1725 */
1726 if (iter->snapshot)
1727 iter->array_buffer = &iter->tr->snapshot_buffer;
1728 #endif
1729 return ret;
1730 }
1731
1732 #ifdef CONFIG_FTRACE_STARTUP_TEST
1733 static bool selftests_can_run;
1734
1735 struct trace_selftests {
1736 struct list_head list;
1737 struct tracer *type;
1738 };
1739
1740 static LIST_HEAD(postponed_selftests);
1741
save_selftest(struct tracer * type)1742 static int save_selftest(struct tracer *type)
1743 {
1744 struct trace_selftests *selftest;
1745
1746 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1747 if (!selftest)
1748 return -ENOMEM;
1749
1750 selftest->type = type;
1751 list_add(&selftest->list, &postponed_selftests);
1752 return 0;
1753 }
1754
run_tracer_selftest(struct tracer * type)1755 static int run_tracer_selftest(struct tracer *type)
1756 {
1757 struct trace_array *tr = &global_trace;
1758 struct tracer_flags *saved_flags = tr->current_trace_flags;
1759 struct tracer *saved_tracer = tr->current_trace;
1760 int ret;
1761
1762 if (!type->selftest || tracing_selftest_disabled)
1763 return 0;
1764
1765 /*
1766 * If a tracer registers early in boot up (before scheduling is
1767 * initialized and such), then do not run its selftests yet.
1768 * Instead, run it a little later in the boot process.
1769 */
1770 if (!selftests_can_run)
1771 return save_selftest(type);
1772
1773 if (!tracing_is_on()) {
1774 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1775 type->name);
1776 return 0;
1777 }
1778
1779 /*
1780 * Run a selftest on this tracer.
1781 * Here we reset the trace buffer, and set the current
1782 * tracer to be this tracer. The tracer can then run some
1783 * internal tracing to verify that everything is in order.
1784 * If we fail, we do not register this tracer.
1785 */
1786 tracing_reset_online_cpus(&tr->array_buffer);
1787
1788 tr->current_trace = type;
1789 tr->current_trace_flags = type->flags ? : type->default_flags;
1790
1791 #ifdef CONFIG_TRACER_MAX_TRACE
1792 if (tracer_uses_snapshot(type)) {
1793 /* If we expanded the buffers, make sure the max is expanded too */
1794 if (tr->ring_buffer_expanded)
1795 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1796 RING_BUFFER_ALL_CPUS);
1797 tr->allocated_snapshot = true;
1798 }
1799 #endif
1800
1801 /* the test is responsible for initializing and enabling */
1802 pr_info("Testing tracer %s: ", type->name);
1803 ret = type->selftest(type, tr);
1804 /* the test is responsible for resetting too */
1805 tr->current_trace = saved_tracer;
1806 tr->current_trace_flags = saved_flags;
1807 if (ret) {
1808 printk(KERN_CONT "FAILED!\n");
1809 /* Add the warning after printing 'FAILED' */
1810 WARN_ON(1);
1811 return -1;
1812 }
1813 /* Only reset on passing, to avoid touching corrupted buffers */
1814 tracing_reset_online_cpus(&tr->array_buffer);
1815
1816 #ifdef CONFIG_TRACER_MAX_TRACE
1817 if (tracer_uses_snapshot(type)) {
1818 tr->allocated_snapshot = false;
1819
1820 /* Shrink the max buffer again */
1821 if (tr->ring_buffer_expanded)
1822 ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1823 RING_BUFFER_ALL_CPUS);
1824 }
1825 #endif
1826
1827 printk(KERN_CONT "PASSED\n");
1828 return 0;
1829 }
1830
do_run_tracer_selftest(struct tracer * type)1831 static int do_run_tracer_selftest(struct tracer *type)
1832 {
1833 int ret;
1834
1835 /*
1836 * Tests can take a long time, especially if they are run one after the
1837 * other, as does happen during bootup when all the tracers are
1838 * registered. This could cause the soft lockup watchdog to trigger.
1839 */
1840 cond_resched();
1841
1842 tracing_selftest_running = true;
1843 ret = run_tracer_selftest(type);
1844 tracing_selftest_running = false;
1845
1846 return ret;
1847 }
1848
init_trace_selftests(void)1849 static __init int init_trace_selftests(void)
1850 {
1851 struct trace_selftests *p, *n;
1852 struct tracer *t, **last;
1853 int ret;
1854
1855 selftests_can_run = true;
1856
1857 guard(mutex)(&trace_types_lock);
1858
1859 if (list_empty(&postponed_selftests))
1860 return 0;
1861
1862 pr_info("Running postponed tracer tests:\n");
1863
1864 tracing_selftest_running = true;
1865 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1866 /* This loop can take minutes when sanitizers are enabled, so
1867 * lets make sure we allow RCU processing.
1868 */
1869 cond_resched();
1870 ret = run_tracer_selftest(p->type);
1871 /* If the test fails, then warn and remove from available_tracers */
1872 if (ret < 0) {
1873 WARN(1, "tracer: %s failed selftest, disabling\n",
1874 p->type->name);
1875 last = &trace_types;
1876 for (t = trace_types; t; t = t->next) {
1877 if (t == p->type) {
1878 *last = t->next;
1879 break;
1880 }
1881 last = &t->next;
1882 }
1883 }
1884 list_del(&p->list);
1885 kfree(p);
1886 }
1887 tracing_selftest_running = false;
1888
1889 return 0;
1890 }
1891 core_initcall(init_trace_selftests);
1892 #else
do_run_tracer_selftest(struct tracer * type)1893 static inline int do_run_tracer_selftest(struct tracer *type)
1894 {
1895 return 0;
1896 }
1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1898
1899 static int add_tracer(struct trace_array *tr, struct tracer *t);
1900
1901 static void __init apply_trace_boot_options(void);
1902
free_tracers(struct trace_array * tr)1903 static void free_tracers(struct trace_array *tr)
1904 {
1905 struct tracers *t, *n;
1906
1907 lockdep_assert_held(&trace_types_lock);
1908
1909 list_for_each_entry_safe(t, n, &tr->tracers, list) {
1910 list_del(&t->list);
1911 kfree(t->flags);
1912 kfree(t);
1913 }
1914 }
1915
1916 /**
1917 * register_tracer - register a tracer with the ftrace system.
1918 * @type: the plugin for the tracer
1919 *
1920 * Register a new plugin tracer.
1921 */
register_tracer(struct tracer * type)1922 int __init register_tracer(struct tracer *type)
1923 {
1924 struct trace_array *tr;
1925 struct tracer *t;
1926 int ret = 0;
1927
1928 if (!type->name) {
1929 pr_info("Tracer must have a name\n");
1930 return -1;
1931 }
1932
1933 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1934 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1935 return -1;
1936 }
1937
1938 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1939 pr_warn("Can not register tracer %s due to lockdown\n",
1940 type->name);
1941 return -EPERM;
1942 }
1943
1944 mutex_lock(&trace_types_lock);
1945
1946 for (t = trace_types; t; t = t->next) {
1947 if (strcmp(type->name, t->name) == 0) {
1948 /* already found */
1949 pr_info("Tracer %s already registered\n",
1950 type->name);
1951 ret = -1;
1952 goto out;
1953 }
1954 }
1955
1956 /* store the tracer for __set_tracer_option */
1957 if (type->flags)
1958 type->flags->trace = type;
1959
1960 ret = do_run_tracer_selftest(type);
1961 if (ret < 0)
1962 goto out;
1963
1964 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1965 ret = add_tracer(tr, type);
1966 if (ret < 0) {
1967 /* The tracer will still exist but without options */
1968 pr_warn("Failed to create tracer options for %s\n", type->name);
1969 break;
1970 }
1971 }
1972
1973 type->next = trace_types;
1974 trace_types = type;
1975
1976 out:
1977 mutex_unlock(&trace_types_lock);
1978
1979 if (ret || !default_bootup_tracer)
1980 return ret;
1981
1982 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1983 return 0;
1984
1985 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1986 /* Do we want this tracer to start on bootup? */
1987 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1988 default_bootup_tracer = NULL;
1989
1990 apply_trace_boot_options();
1991
1992 /* disable other selftests, since this will break it. */
1993 disable_tracing_selftest("running a tracer");
1994
1995 return 0;
1996 }
1997
tracing_reset_cpu(struct array_buffer * buf,int cpu)1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1999 {
2000 struct trace_buffer *buffer = buf->buffer;
2001
2002 if (!buffer)
2003 return;
2004
2005 ring_buffer_record_disable(buffer);
2006
2007 /* Make sure all commits have finished */
2008 synchronize_rcu();
2009 ring_buffer_reset_cpu(buffer, cpu);
2010
2011 ring_buffer_record_enable(buffer);
2012 }
2013
tracing_reset_online_cpus(struct array_buffer * buf)2014 void tracing_reset_online_cpus(struct array_buffer *buf)
2015 {
2016 struct trace_buffer *buffer = buf->buffer;
2017
2018 if (!buffer)
2019 return;
2020
2021 ring_buffer_record_disable(buffer);
2022
2023 /* Make sure all commits have finished */
2024 synchronize_rcu();
2025
2026 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2027
2028 ring_buffer_reset_online_cpus(buffer);
2029
2030 ring_buffer_record_enable(buffer);
2031 }
2032
tracing_reset_all_cpus(struct array_buffer * buf)2033 static void tracing_reset_all_cpus(struct array_buffer *buf)
2034 {
2035 struct trace_buffer *buffer = buf->buffer;
2036
2037 if (!buffer)
2038 return;
2039
2040 ring_buffer_record_disable(buffer);
2041
2042 /* Make sure all commits have finished */
2043 synchronize_rcu();
2044
2045 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2046
2047 ring_buffer_reset(buffer);
2048
2049 ring_buffer_record_enable(buffer);
2050 }
2051
2052 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2053 void tracing_reset_all_online_cpus_unlocked(void)
2054 {
2055 struct trace_array *tr;
2056
2057 lockdep_assert_held(&trace_types_lock);
2058
2059 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2060 if (!tr->clear_trace)
2061 continue;
2062 tr->clear_trace = false;
2063 tracing_reset_online_cpus(&tr->array_buffer);
2064 #ifdef CONFIG_TRACER_SNAPSHOT
2065 tracing_reset_online_cpus(&tr->snapshot_buffer);
2066 #endif
2067 }
2068 }
2069
tracing_reset_all_online_cpus(void)2070 void tracing_reset_all_online_cpus(void)
2071 {
2072 guard(mutex)(&trace_types_lock);
2073 tracing_reset_all_online_cpus_unlocked();
2074 }
2075
is_tracing_stopped(void)2076 int is_tracing_stopped(void)
2077 {
2078 return global_trace.stop_count;
2079 }
2080
tracing_start_tr(struct trace_array * tr)2081 static void tracing_start_tr(struct trace_array *tr)
2082 {
2083 struct trace_buffer *buffer;
2084
2085 if (tracing_disabled)
2086 return;
2087
2088 guard(raw_spinlock_irqsave)(&tr->start_lock);
2089 if (--tr->stop_count) {
2090 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2091 /* Someone screwed up their debugging */
2092 tr->stop_count = 0;
2093 }
2094 return;
2095 }
2096
2097 /* Prevent the buffers from switching */
2098 arch_spin_lock(&tr->max_lock);
2099
2100 buffer = tr->array_buffer.buffer;
2101 if (buffer)
2102 ring_buffer_record_enable(buffer);
2103
2104 #ifdef CONFIG_TRACER_SNAPSHOT
2105 buffer = tr->snapshot_buffer.buffer;
2106 if (buffer)
2107 ring_buffer_record_enable(buffer);
2108 #endif
2109
2110 arch_spin_unlock(&tr->max_lock);
2111 }
2112
2113 /**
2114 * tracing_start - quick start of the tracer
2115 *
2116 * If tracing is enabled but was stopped by tracing_stop,
2117 * this will start the tracer back up.
2118 */
tracing_start(void)2119 void tracing_start(void)
2120
2121 {
2122 return tracing_start_tr(&global_trace);
2123 }
2124
tracing_stop_tr(struct trace_array * tr)2125 static void tracing_stop_tr(struct trace_array *tr)
2126 {
2127 struct trace_buffer *buffer;
2128
2129 guard(raw_spinlock_irqsave)(&tr->start_lock);
2130 if (tr->stop_count++)
2131 return;
2132
2133 /* Prevent the buffers from switching */
2134 arch_spin_lock(&tr->max_lock);
2135
2136 buffer = tr->array_buffer.buffer;
2137 if (buffer)
2138 ring_buffer_record_disable(buffer);
2139
2140 #ifdef CONFIG_TRACER_SNAPSHOT
2141 buffer = tr->snapshot_buffer.buffer;
2142 if (buffer)
2143 ring_buffer_record_disable(buffer);
2144 #endif
2145
2146 arch_spin_unlock(&tr->max_lock);
2147 }
2148
2149 /**
2150 * tracing_stop - quick stop of the tracer
2151 *
2152 * Light weight way to stop tracing. Use in conjunction with
2153 * tracing_start.
2154 */
tracing_stop(void)2155 void tracing_stop(void)
2156 {
2157 return tracing_stop_tr(&global_trace);
2158 }
2159
2160 /*
2161 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2162 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2163 * simplifies those functions and keeps them in sync.
2164 */
trace_handle_return(struct trace_seq * s)2165 enum print_line_t trace_handle_return(struct trace_seq *s)
2166 {
2167 return trace_seq_has_overflowed(s) ?
2168 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2169 }
2170 EXPORT_SYMBOL_GPL(trace_handle_return);
2171
migration_disable_value(void)2172 static unsigned short migration_disable_value(void)
2173 {
2174 #if defined(CONFIG_SMP)
2175 return current->migration_disabled;
2176 #else
2177 return 0;
2178 #endif
2179 }
2180
tracing_gen_ctx_irq_test(unsigned int irqs_status)2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2182 {
2183 unsigned int trace_flags = irqs_status;
2184 unsigned int pc;
2185
2186 pc = preempt_count();
2187
2188 if (pc & NMI_MASK)
2189 trace_flags |= TRACE_FLAG_NMI;
2190 if (pc & HARDIRQ_MASK)
2191 trace_flags |= TRACE_FLAG_HARDIRQ;
2192 if (in_serving_softirq())
2193 trace_flags |= TRACE_FLAG_SOFTIRQ;
2194 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2195 trace_flags |= TRACE_FLAG_BH_OFF;
2196
2197 if (tif_need_resched())
2198 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2199 if (test_preempt_need_resched())
2200 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2201 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2202 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2203 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2204 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2205 }
2206
2207 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2208 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2209 int type,
2210 unsigned long len,
2211 unsigned int trace_ctx)
2212 {
2213 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2214 }
2215
2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2218 static int trace_buffered_event_ref;
2219
2220 /**
2221 * trace_buffered_event_enable - enable buffering events
2222 *
2223 * When events are being filtered, it is quicker to use a temporary
2224 * buffer to write the event data into if there's a likely chance
2225 * that it will not be committed. The discard of the ring buffer
2226 * is not as fast as committing, and is much slower than copying
2227 * a commit.
2228 *
2229 * When an event is to be filtered, allocate per cpu buffers to
2230 * write the event data into, and if the event is filtered and discarded
2231 * it is simply dropped, otherwise, the entire data is to be committed
2232 * in one shot.
2233 */
trace_buffered_event_enable(void)2234 void trace_buffered_event_enable(void)
2235 {
2236 struct ring_buffer_event *event;
2237 struct page *page;
2238 int cpu;
2239
2240 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2241
2242 if (trace_buffered_event_ref++)
2243 return;
2244
2245 for_each_tracing_cpu(cpu) {
2246 page = alloc_pages_node(cpu_to_node(cpu),
2247 GFP_KERNEL | __GFP_NORETRY, 0);
2248 /* This is just an optimization and can handle failures */
2249 if (!page) {
2250 pr_err("Failed to allocate event buffer\n");
2251 break;
2252 }
2253
2254 event = page_address(page);
2255 memset(event, 0, sizeof(*event));
2256
2257 per_cpu(trace_buffered_event, cpu) = event;
2258
2259 scoped_guard(preempt,) {
2260 if (cpu == smp_processor_id() &&
2261 __this_cpu_read(trace_buffered_event) !=
2262 per_cpu(trace_buffered_event, cpu))
2263 WARN_ON_ONCE(1);
2264 }
2265 }
2266 }
2267
enable_trace_buffered_event(void * data)2268 static void enable_trace_buffered_event(void *data)
2269 {
2270 this_cpu_dec(trace_buffered_event_cnt);
2271 }
2272
disable_trace_buffered_event(void * data)2273 static void disable_trace_buffered_event(void *data)
2274 {
2275 this_cpu_inc(trace_buffered_event_cnt);
2276 }
2277
2278 /**
2279 * trace_buffered_event_disable - disable buffering events
2280 *
2281 * When a filter is removed, it is faster to not use the buffered
2282 * events, and to commit directly into the ring buffer. Free up
2283 * the temp buffers when there are no more users. This requires
2284 * special synchronization with current events.
2285 */
trace_buffered_event_disable(void)2286 void trace_buffered_event_disable(void)
2287 {
2288 int cpu;
2289
2290 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2291
2292 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2293 return;
2294
2295 if (--trace_buffered_event_ref)
2296 return;
2297
2298 /* For each CPU, set the buffer as used. */
2299 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2300 NULL, true);
2301
2302 /* Wait for all current users to finish */
2303 synchronize_rcu();
2304
2305 for_each_tracing_cpu(cpu) {
2306 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2307 per_cpu(trace_buffered_event, cpu) = NULL;
2308 }
2309
2310 /*
2311 * Wait for all CPUs that potentially started checking if they can use
2312 * their event buffer only after the previous synchronize_rcu() call and
2313 * they still read a valid pointer from trace_buffered_event. It must be
2314 * ensured they don't see cleared trace_buffered_event_cnt else they
2315 * could wrongly decide to use the pointed-to buffer which is now freed.
2316 */
2317 synchronize_rcu();
2318
2319 /* For each CPU, relinquish the buffer */
2320 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2321 true);
2322 }
2323
2324 static struct trace_buffer *temp_buffer;
2325
2326 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2328 struct trace_event_file *trace_file,
2329 int type, unsigned long len,
2330 unsigned int trace_ctx)
2331 {
2332 struct ring_buffer_event *entry;
2333 struct trace_array *tr = trace_file->tr;
2334 int val;
2335
2336 *current_rb = tr->array_buffer.buffer;
2337
2338 if (!tr->no_filter_buffering_ref &&
2339 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2340 preempt_disable_notrace();
2341 /*
2342 * Filtering is on, so try to use the per cpu buffer first.
2343 * This buffer will simulate a ring_buffer_event,
2344 * where the type_len is zero and the array[0] will
2345 * hold the full length.
2346 * (see include/linux/ring-buffer.h for details on
2347 * how the ring_buffer_event is structured).
2348 *
2349 * Using a temp buffer during filtering and copying it
2350 * on a matched filter is quicker than writing directly
2351 * into the ring buffer and then discarding it when
2352 * it doesn't match. That is because the discard
2353 * requires several atomic operations to get right.
2354 * Copying on match and doing nothing on a failed match
2355 * is still quicker than no copy on match, but having
2356 * to discard out of the ring buffer on a failed match.
2357 */
2358 if ((entry = __this_cpu_read(trace_buffered_event))) {
2359 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2360
2361 val = this_cpu_inc_return(trace_buffered_event_cnt);
2362
2363 /*
2364 * Preemption is disabled, but interrupts and NMIs
2365 * can still come in now. If that happens after
2366 * the above increment, then it will have to go
2367 * back to the old method of allocating the event
2368 * on the ring buffer, and if the filter fails, it
2369 * will have to call ring_buffer_discard_commit()
2370 * to remove it.
2371 *
2372 * Need to also check the unlikely case that the
2373 * length is bigger than the temp buffer size.
2374 * If that happens, then the reserve is pretty much
2375 * guaranteed to fail, as the ring buffer currently
2376 * only allows events less than a page. But that may
2377 * change in the future, so let the ring buffer reserve
2378 * handle the failure in that case.
2379 */
2380 if (val == 1 && likely(len <= max_len)) {
2381 trace_event_setup(entry, type, trace_ctx);
2382 entry->array[0] = len;
2383 /* Return with preemption disabled */
2384 return entry;
2385 }
2386 this_cpu_dec(trace_buffered_event_cnt);
2387 }
2388 /* __trace_buffer_lock_reserve() disables preemption */
2389 preempt_enable_notrace();
2390 }
2391
2392 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2393 trace_ctx);
2394 /*
2395 * If tracing is off, but we have triggers enabled
2396 * we still need to look at the event data. Use the temp_buffer
2397 * to store the trace event for the trigger to use. It's recursive
2398 * safe and will not be recorded anywhere.
2399 */
2400 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2401 *current_rb = temp_buffer;
2402 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2403 trace_ctx);
2404 }
2405 return entry;
2406 }
2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2408
2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2410 static DEFINE_MUTEX(tracepoint_printk_mutex);
2411
output_printk(struct trace_event_buffer * fbuffer)2412 static void output_printk(struct trace_event_buffer *fbuffer)
2413 {
2414 struct trace_event_call *event_call;
2415 struct trace_event_file *file;
2416 struct trace_event *event;
2417 unsigned long flags;
2418 struct trace_iterator *iter = tracepoint_print_iter;
2419
2420 /* We should never get here if iter is NULL */
2421 if (WARN_ON_ONCE(!iter))
2422 return;
2423
2424 event_call = fbuffer->trace_file->event_call;
2425 if (!event_call || !event_call->event.funcs ||
2426 !event_call->event.funcs->trace)
2427 return;
2428
2429 file = fbuffer->trace_file;
2430 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2431 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2432 !filter_match_preds(file->filter, fbuffer->entry)))
2433 return;
2434
2435 event = &fbuffer->trace_file->event_call->event;
2436
2437 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2438 trace_seq_init(&iter->seq);
2439 iter->ent = fbuffer->entry;
2440 event_call->event.funcs->trace(iter, 0, event);
2441 trace_seq_putc(&iter->seq, 0);
2442 printk("%s", iter->seq.buffer);
2443
2444 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2445 }
2446
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2448 void *buffer, size_t *lenp,
2449 loff_t *ppos)
2450 {
2451 int save_tracepoint_printk;
2452 int ret;
2453
2454 guard(mutex)(&tracepoint_printk_mutex);
2455 save_tracepoint_printk = tracepoint_printk;
2456
2457 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2458
2459 /*
2460 * This will force exiting early, as tracepoint_printk
2461 * is always zero when tracepoint_printk_iter is not allocated
2462 */
2463 if (!tracepoint_print_iter)
2464 tracepoint_printk = 0;
2465
2466 if (save_tracepoint_printk == tracepoint_printk)
2467 return ret;
2468
2469 if (tracepoint_printk)
2470 static_key_enable(&tracepoint_printk_key.key);
2471 else
2472 static_key_disable(&tracepoint_printk_key.key);
2473
2474 return ret;
2475 }
2476
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2478 {
2479 enum event_trigger_type tt = ETT_NONE;
2480 struct trace_event_file *file = fbuffer->trace_file;
2481
2482 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2483 fbuffer->entry, &tt))
2484 goto discard;
2485
2486 if (static_key_false(&tracepoint_printk_key.key))
2487 output_printk(fbuffer);
2488
2489 if (static_branch_unlikely(&trace_event_exports_enabled))
2490 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2491
2492 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2493 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2494
2495 discard:
2496 if (tt)
2497 event_triggers_post_call(file, tt);
2498
2499 }
2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2501
2502 /*
2503 * Skip 3:
2504 *
2505 * trace_buffer_unlock_commit_regs()
2506 * trace_event_buffer_commit()
2507 * trace_event_raw_event_xxx()
2508 */
2509 # define STACK_SKIP 3
2510
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2512 struct trace_buffer *buffer,
2513 struct ring_buffer_event *event,
2514 unsigned int trace_ctx,
2515 struct pt_regs *regs)
2516 {
2517 __buffer_unlock_commit(buffer, event);
2518
2519 /*
2520 * If regs is not set, then skip the necessary functions.
2521 * Note, we can still get here via blktrace, wakeup tracer
2522 * and mmiotrace, but that's ok if they lose a function or
2523 * two. They are not that meaningful.
2524 */
2525 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2526 ftrace_trace_userstack(tr, buffer, trace_ctx);
2527 }
2528
2529 /*
2530 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2531 */
2532 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2534 struct ring_buffer_event *event)
2535 {
2536 __buffer_unlock_commit(buffer, event);
2537 }
2538
2539 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2541 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2542 {
2543 struct trace_buffer *buffer = tr->array_buffer.buffer;
2544 struct ring_buffer_event *event;
2545 struct ftrace_entry *entry;
2546 int size = sizeof(*entry);
2547
2548 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2549
2550 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2551 trace_ctx);
2552 if (!event)
2553 return;
2554 entry = ring_buffer_event_data(event);
2555 entry->ip = ip;
2556 entry->parent_ip = parent_ip;
2557
2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2559 if (fregs) {
2560 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2561 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2562 }
2563 #endif
2564
2565 if (static_branch_unlikely(&trace_function_exports_enabled))
2566 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2567 __buffer_unlock_commit(buffer, event);
2568 }
2569
2570 #ifdef CONFIG_STACKTRACE
2571
2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2573 #define FTRACE_KSTACK_NESTING 4
2574
2575 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2576
2577 struct ftrace_stack {
2578 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2579 };
2580
2581
2582 struct ftrace_stacks {
2583 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2584 };
2585
2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2588
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2589 void __ftrace_trace_stack(struct trace_array *tr,
2590 struct trace_buffer *buffer,
2591 unsigned int trace_ctx,
2592 int skip, struct pt_regs *regs)
2593 {
2594 struct ring_buffer_event *event;
2595 unsigned int size, nr_entries;
2596 struct ftrace_stack *fstack;
2597 struct stack_entry *entry;
2598 int stackidx;
2599 int bit;
2600
2601 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2602 if (bit < 0)
2603 return;
2604
2605 /*
2606 * Add one, for this function and the call to save_stack_trace()
2607 * If regs is set, then these functions will not be in the way.
2608 */
2609 #ifndef CONFIG_UNWINDER_ORC
2610 if (!regs)
2611 skip++;
2612 #endif
2613
2614 guard(preempt_notrace)();
2615
2616 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2617
2618 /* This should never happen. If it does, yell once and skip */
2619 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2620 goto out;
2621
2622 /*
2623 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2624 * interrupt will either see the value pre increment or post
2625 * increment. If the interrupt happens pre increment it will have
2626 * restored the counter when it returns. We just need a barrier to
2627 * keep gcc from moving things around.
2628 */
2629 barrier();
2630
2631 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2632 size = ARRAY_SIZE(fstack->calls);
2633
2634 if (regs) {
2635 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2636 size, skip);
2637 } else {
2638 nr_entries = stack_trace_save(fstack->calls, size, skip);
2639 }
2640
2641 #ifdef CONFIG_DYNAMIC_FTRACE
2642 /* Mark entry of stack trace as trampoline code */
2643 if (tr->ops && tr->ops->trampoline) {
2644 unsigned long tramp_start = tr->ops->trampoline;
2645 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2646 unsigned long *calls = fstack->calls;
2647
2648 for (int i = 0; i < nr_entries; i++) {
2649 if (calls[i] >= tramp_start && calls[i] < tramp_end)
2650 calls[i] = FTRACE_TRAMPOLINE_MARKER;
2651 }
2652 }
2653 #endif
2654
2655 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2656 struct_size(entry, caller, nr_entries),
2657 trace_ctx);
2658 if (!event)
2659 goto out;
2660 entry = ring_buffer_event_data(event);
2661
2662 entry->size = nr_entries;
2663 memcpy(&entry->caller, fstack->calls,
2664 flex_array_size(entry, caller, nr_entries));
2665
2666 __buffer_unlock_commit(buffer, event);
2667
2668 out:
2669 /* Again, don't let gcc optimize things here */
2670 barrier();
2671 __this_cpu_dec(ftrace_stack_reserve);
2672 trace_clear_recursion(bit);
2673 }
2674
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2676 int skip)
2677 {
2678 struct trace_buffer *buffer = tr->array_buffer.buffer;
2679
2680 if (rcu_is_watching()) {
2681 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2682 return;
2683 }
2684
2685 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2686 return;
2687
2688 /*
2689 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2690 * but if the above rcu_is_watching() failed, then the NMI
2691 * triggered someplace critical, and ct_irq_enter() should
2692 * not be called from NMI.
2693 */
2694 if (unlikely(in_nmi()))
2695 return;
2696
2697 ct_irq_enter_irqson();
2698 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2699 ct_irq_exit_irqson();
2700 }
2701
2702 /**
2703 * trace_dump_stack - record a stack back trace in the trace buffer
2704 * @skip: Number of functions to skip (helper handlers)
2705 */
trace_dump_stack(int skip)2706 void trace_dump_stack(int skip)
2707 {
2708 if (tracing_disabled || tracing_selftest_running)
2709 return;
2710
2711 #ifndef CONFIG_UNWINDER_ORC
2712 /* Skip 1 to skip this function. */
2713 skip++;
2714 #endif
2715 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2716 tracing_gen_ctx(), skip, NULL);
2717 }
2718 EXPORT_SYMBOL_GPL(trace_dump_stack);
2719
2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2721 static DEFINE_PER_CPU(int, user_stack_count);
2722
2723 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2724 ftrace_trace_userstack(struct trace_array *tr,
2725 struct trace_buffer *buffer, unsigned int trace_ctx)
2726 {
2727 struct ring_buffer_event *event;
2728 struct userstack_entry *entry;
2729
2730 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2731 return;
2732
2733 /*
2734 * NMIs can not handle page faults, even with fix ups.
2735 * The save user stack can (and often does) fault.
2736 */
2737 if (unlikely(in_nmi()))
2738 return;
2739
2740 /*
2741 * prevent recursion, since the user stack tracing may
2742 * trigger other kernel events.
2743 */
2744 guard(preempt)();
2745 if (__this_cpu_read(user_stack_count))
2746 return;
2747
2748 __this_cpu_inc(user_stack_count);
2749
2750 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2751 sizeof(*entry), trace_ctx);
2752 if (!event)
2753 goto out_drop_count;
2754 entry = ring_buffer_event_data(event);
2755
2756 entry->tgid = current->tgid;
2757 memset(&entry->caller, 0, sizeof(entry->caller));
2758
2759 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2760 __buffer_unlock_commit(buffer, event);
2761
2762 out_drop_count:
2763 __this_cpu_dec(user_stack_count);
2764 }
2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2766 static void ftrace_trace_userstack(struct trace_array *tr,
2767 struct trace_buffer *buffer,
2768 unsigned int trace_ctx)
2769 {
2770 }
2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2772
2773 #endif /* CONFIG_STACKTRACE */
2774
2775 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2777 unsigned long long delta)
2778 {
2779 entry->bottom_delta_ts = delta & U32_MAX;
2780 entry->top_delta_ts = (delta >> 32);
2781 }
2782
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2783 void trace_last_func_repeats(struct trace_array *tr,
2784 struct trace_func_repeats *last_info,
2785 unsigned int trace_ctx)
2786 {
2787 struct trace_buffer *buffer = tr->array_buffer.buffer;
2788 struct func_repeats_entry *entry;
2789 struct ring_buffer_event *event;
2790 u64 delta;
2791
2792 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2793 sizeof(*entry), trace_ctx);
2794 if (!event)
2795 return;
2796
2797 delta = ring_buffer_event_time_stamp(buffer, event) -
2798 last_info->ts_last_call;
2799
2800 entry = ring_buffer_event_data(event);
2801 entry->ip = last_info->ip;
2802 entry->parent_ip = last_info->parent_ip;
2803 entry->count = last_info->count;
2804 func_repeats_set_delta_ts(entry, delta);
2805
2806 __buffer_unlock_commit(buffer, event);
2807 }
2808
trace_iterator_increment(struct trace_iterator * iter)2809 static void trace_iterator_increment(struct trace_iterator *iter)
2810 {
2811 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2812
2813 iter->idx++;
2814 if (buf_iter)
2815 ring_buffer_iter_advance(buf_iter);
2816 }
2817
2818 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2820 unsigned long *lost_events)
2821 {
2822 struct ring_buffer_event *event;
2823 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2824
2825 if (buf_iter) {
2826 event = ring_buffer_iter_peek(buf_iter, ts);
2827 if (lost_events)
2828 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
2829 (unsigned long)-1 : 0;
2830 } else {
2831 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2832 lost_events);
2833 }
2834
2835 if (event) {
2836 iter->ent_size = ring_buffer_event_length(event);
2837 return ring_buffer_event_data(event);
2838 }
2839 iter->ent_size = 0;
2840 return NULL;
2841 }
2842
2843 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2845 unsigned long *missing_events, u64 *ent_ts)
2846 {
2847 struct trace_buffer *buffer = iter->array_buffer->buffer;
2848 struct trace_entry *ent, *next = NULL;
2849 unsigned long lost_events = 0, next_lost = 0;
2850 int cpu_file = iter->cpu_file;
2851 u64 next_ts = 0, ts;
2852 int next_cpu = -1;
2853 int next_size = 0;
2854 int cpu;
2855
2856 /*
2857 * If we are in a per_cpu trace file, don't bother by iterating over
2858 * all cpu and peek directly.
2859 */
2860 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2861 if (ring_buffer_empty_cpu(buffer, cpu_file))
2862 return NULL;
2863 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2864 if (ent_cpu)
2865 *ent_cpu = cpu_file;
2866
2867 return ent;
2868 }
2869
2870 for_each_tracing_cpu(cpu) {
2871
2872 if (ring_buffer_empty_cpu(buffer, cpu))
2873 continue;
2874
2875 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2876
2877 /*
2878 * Pick the entry with the smallest timestamp:
2879 */
2880 if (ent && (!next || ts < next_ts)) {
2881 next = ent;
2882 next_cpu = cpu;
2883 next_ts = ts;
2884 next_lost = lost_events;
2885 next_size = iter->ent_size;
2886 }
2887 }
2888
2889 iter->ent_size = next_size;
2890
2891 if (ent_cpu)
2892 *ent_cpu = next_cpu;
2893
2894 if (ent_ts)
2895 *ent_ts = next_ts;
2896
2897 if (missing_events)
2898 *missing_events = next_lost;
2899
2900 return next;
2901 }
2902
2903 #define STATIC_FMT_BUF_SIZE 128
2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2905
trace_iter_expand_format(struct trace_iterator * iter)2906 char *trace_iter_expand_format(struct trace_iterator *iter)
2907 {
2908 char *tmp;
2909
2910 /*
2911 * iter->tr is NULL when used with tp_printk, which makes
2912 * this get called where it is not safe to call krealloc().
2913 */
2914 if (!iter->tr || iter->fmt == static_fmt_buf)
2915 return NULL;
2916
2917 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2918 GFP_KERNEL);
2919 if (tmp) {
2920 iter->fmt_size += STATIC_FMT_BUF_SIZE;
2921 iter->fmt = tmp;
2922 }
2923
2924 return tmp;
2925 }
2926
2927 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2929 {
2930 unsigned long addr = (unsigned long)str;
2931 struct trace_event *trace_event;
2932 struct trace_event_call *event;
2933
2934 /* OK if part of the event data */
2935 if ((addr >= (unsigned long)iter->ent) &&
2936 (addr < (unsigned long)iter->ent + iter->ent_size))
2937 return true;
2938
2939 /* OK if part of the temp seq buffer */
2940 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2941 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2942 return true;
2943
2944 /* Core rodata can not be freed */
2945 if (is_kernel_rodata(addr))
2946 return true;
2947
2948 if (trace_is_tracepoint_string(str))
2949 return true;
2950
2951 /*
2952 * Now this could be a module event, referencing core module
2953 * data, which is OK.
2954 */
2955 if (!iter->ent)
2956 return false;
2957
2958 trace_event = ftrace_find_event(iter->ent->type);
2959 if (!trace_event)
2960 return false;
2961
2962 event = container_of(trace_event, struct trace_event_call, event);
2963 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2964 return false;
2965
2966 /* Would rather have rodata, but this will suffice */
2967 if (within_module_core(addr, event->module))
2968 return true;
2969
2970 return false;
2971 }
2972
2973 /**
2974 * ignore_event - Check dereferenced fields while writing to the seq buffer
2975 * @iter: The iterator that holds the seq buffer and the event being printed
2976 *
2977 * At boot up, test_event_printk() will flag any event that dereferences
2978 * a string with "%s" that does exist in the ring buffer. It may still
2979 * be valid, as the string may point to a static string in the kernel
2980 * rodata that never gets freed. But if the string pointer is pointing
2981 * to something that was allocated, there's a chance that it can be freed
2982 * by the time the user reads the trace. This would cause a bad memory
2983 * access by the kernel and possibly crash the system.
2984 *
2985 * This function will check if the event has any fields flagged as needing
2986 * to be checked at runtime and perform those checks.
2987 *
2988 * If it is found that a field is unsafe, it will write into the @iter->seq
2989 * a message stating what was found to be unsafe.
2990 *
2991 * @return: true if the event is unsafe and should be ignored,
2992 * false otherwise.
2993 */
ignore_event(struct trace_iterator * iter)2994 bool ignore_event(struct trace_iterator *iter)
2995 {
2996 struct ftrace_event_field *field;
2997 struct trace_event *trace_event;
2998 struct trace_event_call *event;
2999 struct list_head *head;
3000 struct trace_seq *seq;
3001 const void *ptr;
3002
3003 trace_event = ftrace_find_event(iter->ent->type);
3004
3005 seq = &iter->seq;
3006
3007 if (!trace_event) {
3008 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3009 return true;
3010 }
3011
3012 event = container_of(trace_event, struct trace_event_call, event);
3013 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3014 return false;
3015
3016 head = trace_get_fields(event);
3017 if (!head) {
3018 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3019 trace_event_name(event));
3020 return true;
3021 }
3022
3023 /* Offsets are from the iter->ent that points to the raw event */
3024 ptr = iter->ent;
3025
3026 list_for_each_entry(field, head, link) {
3027 const char *str;
3028 bool good;
3029
3030 if (!field->needs_test)
3031 continue;
3032
3033 str = *(const char **)(ptr + field->offset);
3034
3035 good = trace_safe_str(iter, str);
3036
3037 /*
3038 * If you hit this warning, it is likely that the
3039 * trace event in question used %s on a string that
3040 * was saved at the time of the event, but may not be
3041 * around when the trace is read. Use __string(),
3042 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3043 * instead. See samples/trace_events/trace-events-sample.h
3044 * for reference.
3045 */
3046 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3047 trace_event_name(event), field->name)) {
3048 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3049 trace_event_name(event), field->name);
3050 return true;
3051 }
3052 }
3053 return false;
3054 }
3055
trace_event_format(struct trace_iterator * iter,const char * fmt)3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3057 {
3058 const char *p, *new_fmt;
3059 char *q;
3060
3061 if (WARN_ON_ONCE(!fmt))
3062 return fmt;
3063
3064 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3065 return fmt;
3066
3067 p = fmt;
3068 new_fmt = q = iter->fmt;
3069 while (*p) {
3070 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3071 if (!trace_iter_expand_format(iter))
3072 return fmt;
3073
3074 q += iter->fmt - new_fmt;
3075 new_fmt = iter->fmt;
3076 }
3077
3078 *q++ = *p++;
3079
3080 /* Replace %p with %px */
3081 if (p[-1] == '%') {
3082 if (p[0] == '%') {
3083 *q++ = *p++;
3084 } else if (p[0] == 'p' && !isalnum(p[1])) {
3085 *q++ = *p++;
3086 *q++ = 'x';
3087 }
3088 }
3089 }
3090 *q = '\0';
3091
3092 return new_fmt;
3093 }
3094
3095 #define STATIC_TEMP_BUF_SIZE 128
3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3097
3098 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3100 int *ent_cpu, u64 *ent_ts)
3101 {
3102 /* __find_next_entry will reset ent_size */
3103 int ent_size = iter->ent_size;
3104 struct trace_entry *entry;
3105
3106 /*
3107 * If called from ftrace_dump(), then the iter->temp buffer
3108 * will be the static_temp_buf and not created from kmalloc.
3109 * If the entry size is greater than the buffer, we can
3110 * not save it. Just return NULL in that case. This is only
3111 * used to add markers when two consecutive events' time
3112 * stamps have a large delta. See trace_print_lat_context()
3113 */
3114 if (iter->temp == static_temp_buf &&
3115 STATIC_TEMP_BUF_SIZE < ent_size)
3116 return NULL;
3117
3118 /*
3119 * The __find_next_entry() may call peek_next_entry(), which may
3120 * call ring_buffer_peek() that may make the contents of iter->ent
3121 * undefined. Need to copy iter->ent now.
3122 */
3123 if (iter->ent && iter->ent != iter->temp) {
3124 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3125 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3126 void *temp;
3127 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3128 if (!temp)
3129 return NULL;
3130 kfree(iter->temp);
3131 iter->temp = temp;
3132 iter->temp_size = iter->ent_size;
3133 }
3134 memcpy(iter->temp, iter->ent, iter->ent_size);
3135 iter->ent = iter->temp;
3136 }
3137 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3138 /* Put back the original ent_size */
3139 iter->ent_size = ent_size;
3140
3141 return entry;
3142 }
3143
3144 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3145 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3146 {
3147 iter->ent = __find_next_entry(iter, &iter->cpu,
3148 &iter->lost_events, &iter->ts);
3149
3150 if (iter->ent)
3151 trace_iterator_increment(iter);
3152
3153 return iter->ent ? iter : NULL;
3154 }
3155
trace_consume(struct trace_iterator * iter)3156 static void trace_consume(struct trace_iterator *iter)
3157 {
3158 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3159 &iter->lost_events);
3160 }
3161
s_next(struct seq_file * m,void * v,loff_t * pos)3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3163 {
3164 struct trace_iterator *iter = m->private;
3165 int i = (int)*pos;
3166 void *ent;
3167
3168 WARN_ON_ONCE(iter->leftover);
3169
3170 (*pos)++;
3171
3172 /* can't go backwards */
3173 if (iter->idx > i)
3174 return NULL;
3175
3176 if (iter->idx < 0)
3177 ent = trace_find_next_entry_inc(iter);
3178 else
3179 ent = iter;
3180
3181 while (ent && iter->idx < i)
3182 ent = trace_find_next_entry_inc(iter);
3183
3184 iter->pos = *pos;
3185
3186 return ent;
3187 }
3188
tracing_iter_reset(struct trace_iterator * iter,int cpu)3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3190 {
3191 struct ring_buffer_iter *buf_iter;
3192 unsigned long entries = 0;
3193 u64 ts;
3194
3195 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3196
3197 buf_iter = trace_buffer_iter(iter, cpu);
3198 if (!buf_iter)
3199 return;
3200
3201 ring_buffer_iter_reset(buf_iter);
3202
3203 /*
3204 * We could have the case with the max latency tracers
3205 * that a reset never took place on a cpu. This is evident
3206 * by the timestamp being before the start of the buffer.
3207 */
3208 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3209 if (ts >= iter->array_buffer->time_start)
3210 break;
3211 entries++;
3212 ring_buffer_iter_advance(buf_iter);
3213 /* This could be a big loop */
3214 cond_resched();
3215 }
3216
3217 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3218 }
3219
3220 /*
3221 * The current tracer is copied to avoid a global locking
3222 * all around.
3223 */
s_start(struct seq_file * m,loff_t * pos)3224 static void *s_start(struct seq_file *m, loff_t *pos)
3225 {
3226 struct trace_iterator *iter = m->private;
3227 struct trace_array *tr = iter->tr;
3228 int cpu_file = iter->cpu_file;
3229 void *p = NULL;
3230 loff_t l = 0;
3231 int cpu;
3232
3233 mutex_lock(&trace_types_lock);
3234 if (unlikely(tr->current_trace != iter->trace)) {
3235 /* Close iter->trace before switching to the new current tracer */
3236 if (iter->trace->close)
3237 iter->trace->close(iter);
3238 iter->trace = tr->current_trace;
3239 /* Reopen the new current tracer */
3240 if (iter->trace->open)
3241 iter->trace->open(iter);
3242 }
3243 mutex_unlock(&trace_types_lock);
3244
3245 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3246 return ERR_PTR(-EBUSY);
3247
3248 if (*pos != iter->pos) {
3249 iter->ent = NULL;
3250 iter->cpu = 0;
3251 iter->idx = -1;
3252
3253 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3254 for_each_tracing_cpu(cpu)
3255 tracing_iter_reset(iter, cpu);
3256 } else
3257 tracing_iter_reset(iter, cpu_file);
3258
3259 iter->leftover = 0;
3260 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3261 ;
3262
3263 } else {
3264 /*
3265 * If we overflowed the seq_file before, then we want
3266 * to just reuse the trace_seq buffer again.
3267 */
3268 if (iter->leftover)
3269 p = iter;
3270 else {
3271 l = *pos - 1;
3272 p = s_next(m, p, &l);
3273 }
3274 }
3275
3276 trace_event_read_lock();
3277 trace_access_lock(cpu_file);
3278 return p;
3279 }
3280
s_stop(struct seq_file * m,void * p)3281 static void s_stop(struct seq_file *m, void *p)
3282 {
3283 struct trace_iterator *iter = m->private;
3284
3285 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
3286 return;
3287
3288 trace_access_unlock(iter->cpu_file);
3289 trace_event_read_unlock();
3290 }
3291
3292 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3294 unsigned long *entries, int cpu)
3295 {
3296 unsigned long count;
3297
3298 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3299 /*
3300 * If this buffer has skipped entries, then we hold all
3301 * entries for the trace and we need to ignore the
3302 * ones before the time stamp.
3303 */
3304 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3305 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3306 /* total is the same as the entries */
3307 *total = count;
3308 } else
3309 *total = count +
3310 ring_buffer_overrun_cpu(buf->buffer, cpu);
3311 *entries = count;
3312 }
3313
3314 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3315 get_total_entries(struct array_buffer *buf,
3316 unsigned long *total, unsigned long *entries)
3317 {
3318 unsigned long t, e;
3319 int cpu;
3320
3321 *total = 0;
3322 *entries = 0;
3323
3324 for_each_tracing_cpu(cpu) {
3325 get_total_entries_cpu(buf, &t, &e, cpu);
3326 *total += t;
3327 *entries += e;
3328 }
3329 }
3330
trace_total_entries_cpu(struct trace_array * tr,int cpu)3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3332 {
3333 unsigned long total, entries;
3334
3335 if (!tr)
3336 tr = &global_trace;
3337
3338 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3339
3340 return entries;
3341 }
3342
trace_total_entries(struct trace_array * tr)3343 unsigned long trace_total_entries(struct trace_array *tr)
3344 {
3345 unsigned long total, entries;
3346
3347 if (!tr)
3348 tr = &global_trace;
3349
3350 get_total_entries(&tr->array_buffer, &total, &entries);
3351
3352 return entries;
3353 }
3354
print_lat_help_header(struct seq_file * m)3355 static void print_lat_help_header(struct seq_file *m)
3356 {
3357 seq_puts(m, "# _------=> CPU# \n"
3358 "# / _-----=> irqs-off/BH-disabled\n"
3359 "# | / _----=> need-resched \n"
3360 "# || / _---=> hardirq/softirq \n"
3361 "# ||| / _--=> preempt-depth \n"
3362 "# |||| / _-=> migrate-disable \n"
3363 "# ||||| / delay \n"
3364 "# cmd pid |||||| time | caller \n"
3365 "# \\ / |||||| \\ | / \n");
3366 }
3367
print_event_info(struct array_buffer * buf,struct seq_file * m)3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3369 {
3370 unsigned long total;
3371 unsigned long entries;
3372
3373 get_total_entries(buf, &total, &entries);
3374 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3375 entries, total, num_online_cpus());
3376 seq_puts(m, "#\n");
3377 }
3378
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3380 unsigned int flags)
3381 {
3382 bool tgid = flags & TRACE_ITER(RECORD_TGID);
3383
3384 print_event_info(buf, m);
3385
3386 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3387 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3388 }
3389
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3391 unsigned int flags)
3392 {
3393 bool tgid = flags & TRACE_ITER(RECORD_TGID);
3394 static const char space[] = " ";
3395 int prec = tgid ? 12 : 2;
3396
3397 print_event_info(buf, m);
3398
3399 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
3400 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3401 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3402 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3403 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
3404 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
3405 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3406 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
3407 }
3408
3409 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3411 {
3412 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3413 struct array_buffer *buf = iter->array_buffer;
3414 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3415 struct tracer *type = iter->trace;
3416 unsigned long entries;
3417 unsigned long total;
3418 const char *name = type->name;
3419
3420 get_total_entries(buf, &total, &entries);
3421
3422 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 name, init_utsname()->release);
3424 seq_puts(m, "# -----------------------------------"
3425 "---------------------------------\n");
3426 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 nsecs_to_usecs(data->saved_latency),
3429 entries,
3430 total,
3431 buf->cpu,
3432 preempt_model_str(),
3433 /* These are reserved for later use */
3434 0, 0, 0, 0);
3435 #ifdef CONFIG_SMP
3436 seq_printf(m, " #P:%d)\n", num_online_cpus());
3437 #else
3438 seq_puts(m, ")\n");
3439 #endif
3440 seq_puts(m, "# -----------------\n");
3441 seq_printf(m, "# | task: %.16s-%d "
3442 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3443 data->comm, data->pid,
3444 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3445 data->policy, data->rt_priority);
3446 seq_puts(m, "# -----------------\n");
3447
3448 if (data->critical_start) {
3449 seq_puts(m, "# => started at: ");
3450 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3451 trace_print_seq(m, &iter->seq);
3452 seq_puts(m, "\n# => ended at: ");
3453 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3454 trace_print_seq(m, &iter->seq);
3455 seq_puts(m, "\n#\n");
3456 }
3457
3458 seq_puts(m, "#\n");
3459 }
3460
test_cpu_buff_start(struct trace_iterator * iter)3461 static void test_cpu_buff_start(struct trace_iterator *iter)
3462 {
3463 struct trace_seq *s = &iter->seq;
3464 struct trace_array *tr = iter->tr;
3465
3466 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
3467 return;
3468
3469 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3470 return;
3471
3472 if (cpumask_available(iter->started) &&
3473 cpumask_test_cpu(iter->cpu, iter->started))
3474 return;
3475
3476 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3477 return;
3478
3479 if (cpumask_available(iter->started))
3480 cpumask_set_cpu(iter->cpu, iter->started);
3481
3482 /* Don't print started cpu buffer for the first entry of the trace */
3483 if (iter->idx > 1)
3484 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3485 iter->cpu);
3486 }
3487
3488 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3489 static bool is_syscall_event(struct trace_event *event)
3490 {
3491 return (event->funcs == &enter_syscall_print_funcs) ||
3492 (event->funcs == &exit_syscall_print_funcs);
3493
3494 }
3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3496 #else
is_syscall_event(struct trace_event * event)3497 static inline bool is_syscall_event(struct trace_event *event)
3498 {
3499 return false;
3500 }
3501 #define syscall_buf_size 0
3502 #endif /* CONFIG_FTRACE_SYSCALLS */
3503
print_trace_fmt(struct trace_iterator * iter)3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3505 {
3506 struct trace_array *tr = iter->tr;
3507 struct trace_seq *s = &iter->seq;
3508 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3509 struct trace_entry *entry;
3510 struct trace_event *event;
3511
3512 entry = iter->ent;
3513
3514 test_cpu_buff_start(iter);
3515
3516 event = ftrace_find_event(entry->type);
3517
3518 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3519 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3520 trace_print_lat_context(iter);
3521 else
3522 trace_print_context(iter);
3523 }
3524
3525 if (trace_seq_has_overflowed(s))
3526 return TRACE_TYPE_PARTIAL_LINE;
3527
3528 if (event) {
3529 if (tr->trace_flags & TRACE_ITER(FIELDS))
3530 return print_event_fields(iter, event);
3531 /*
3532 * For TRACE_EVENT() events, the print_fmt is not
3533 * safe to use if the array has delta offsets
3534 * Force printing via the fields.
3535 */
3536 if ((tr->text_delta)) {
3537 /* ftrace and system call events are still OK */
3538 if ((event->type > __TRACE_LAST_TYPE) &&
3539 !is_syscall_event(event))
3540 return print_event_fields(iter, event);
3541 }
3542 return event->funcs->trace(iter, sym_flags, event);
3543 }
3544
3545 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3546
3547 return trace_handle_return(s);
3548 }
3549
print_raw_fmt(struct trace_iterator * iter)3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3551 {
3552 struct trace_array *tr = iter->tr;
3553 struct trace_seq *s = &iter->seq;
3554 struct trace_entry *entry;
3555 struct trace_event *event;
3556
3557 entry = iter->ent;
3558
3559 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3560 trace_seq_printf(s, "%d %d %llu ",
3561 entry->pid, iter->cpu, iter->ts);
3562
3563 if (trace_seq_has_overflowed(s))
3564 return TRACE_TYPE_PARTIAL_LINE;
3565
3566 event = ftrace_find_event(entry->type);
3567 if (event)
3568 return event->funcs->raw(iter, 0, event);
3569
3570 trace_seq_printf(s, "%d ?\n", entry->type);
3571
3572 return trace_handle_return(s);
3573 }
3574
print_hex_fmt(struct trace_iterator * iter)3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3576 {
3577 struct trace_array *tr = iter->tr;
3578 struct trace_seq *s = &iter->seq;
3579 unsigned char newline = '\n';
3580 struct trace_entry *entry;
3581 struct trace_event *event;
3582
3583 entry = iter->ent;
3584
3585 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3586 SEQ_PUT_HEX_FIELD(s, entry->pid);
3587 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3588 SEQ_PUT_HEX_FIELD(s, iter->ts);
3589 if (trace_seq_has_overflowed(s))
3590 return TRACE_TYPE_PARTIAL_LINE;
3591 }
3592
3593 event = ftrace_find_event(entry->type);
3594 if (event) {
3595 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3596 if (ret != TRACE_TYPE_HANDLED)
3597 return ret;
3598 }
3599
3600 SEQ_PUT_FIELD(s, newline);
3601
3602 return trace_handle_return(s);
3603 }
3604
print_bin_fmt(struct trace_iterator * iter)3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3606 {
3607 struct trace_array *tr = iter->tr;
3608 struct trace_seq *s = &iter->seq;
3609 struct trace_entry *entry;
3610 struct trace_event *event;
3611
3612 entry = iter->ent;
3613
3614 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3615 SEQ_PUT_FIELD(s, entry->pid);
3616 SEQ_PUT_FIELD(s, iter->cpu);
3617 SEQ_PUT_FIELD(s, iter->ts);
3618 if (trace_seq_has_overflowed(s))
3619 return TRACE_TYPE_PARTIAL_LINE;
3620 }
3621
3622 event = ftrace_find_event(entry->type);
3623 return event ? event->funcs->binary(iter, 0, event) :
3624 TRACE_TYPE_HANDLED;
3625 }
3626
trace_empty(struct trace_iterator * iter)3627 int trace_empty(struct trace_iterator *iter)
3628 {
3629 struct ring_buffer_iter *buf_iter;
3630 int cpu;
3631
3632 /* If we are looking at one CPU buffer, only check that one */
3633 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3634 cpu = iter->cpu_file;
3635 buf_iter = trace_buffer_iter(iter, cpu);
3636 if (buf_iter) {
3637 if (!ring_buffer_iter_empty(buf_iter))
3638 return 0;
3639 } else {
3640 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3641 return 0;
3642 }
3643 return 1;
3644 }
3645
3646 for_each_tracing_cpu(cpu) {
3647 buf_iter = trace_buffer_iter(iter, cpu);
3648 if (buf_iter) {
3649 if (!ring_buffer_iter_empty(buf_iter))
3650 return 0;
3651 } else {
3652 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3653 return 0;
3654 }
3655 }
3656
3657 return 1;
3658 }
3659
3660 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3661 enum print_line_t print_trace_line(struct trace_iterator *iter)
3662 {
3663 struct trace_array *tr = iter->tr;
3664 unsigned long trace_flags = tr->trace_flags;
3665 enum print_line_t ret;
3666
3667 if (iter->lost_events) {
3668 if (iter->lost_events == (unsigned long)-1)
3669 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3670 iter->cpu);
3671 else
3672 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3673 iter->cpu, iter->lost_events);
3674 if (trace_seq_has_overflowed(&iter->seq))
3675 return TRACE_TYPE_PARTIAL_LINE;
3676 }
3677
3678 if (iter->trace && iter->trace->print_line) {
3679 ret = iter->trace->print_line(iter);
3680 if (ret != TRACE_TYPE_UNHANDLED)
3681 return ret;
3682 }
3683
3684 if (iter->ent->type == TRACE_BPUTS &&
3685 trace_flags & TRACE_ITER(PRINTK) &&
3686 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3687 return trace_print_bputs_msg_only(iter);
3688
3689 if (iter->ent->type == TRACE_BPRINT &&
3690 trace_flags & TRACE_ITER(PRINTK) &&
3691 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3692 return trace_print_bprintk_msg_only(iter);
3693
3694 if (iter->ent->type == TRACE_PRINT &&
3695 trace_flags & TRACE_ITER(PRINTK) &&
3696 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3697 return trace_print_printk_msg_only(iter);
3698
3699 if (trace_flags & TRACE_ITER(BIN))
3700 return print_bin_fmt(iter);
3701
3702 if (trace_flags & TRACE_ITER(HEX))
3703 return print_hex_fmt(iter);
3704
3705 if (trace_flags & TRACE_ITER(RAW))
3706 return print_raw_fmt(iter);
3707
3708 return print_trace_fmt(iter);
3709 }
3710
trace_latency_header(struct seq_file * m)3711 void trace_latency_header(struct seq_file *m)
3712 {
3713 struct trace_iterator *iter = m->private;
3714 struct trace_array *tr = iter->tr;
3715
3716 /* print nothing if the buffers are empty */
3717 if (trace_empty(iter))
3718 return;
3719
3720 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3721 print_trace_header(m, iter);
3722
3723 if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3724 print_lat_help_header(m);
3725 }
3726
trace_default_header(struct seq_file * m)3727 void trace_default_header(struct seq_file *m)
3728 {
3729 struct trace_iterator *iter = m->private;
3730 struct trace_array *tr = iter->tr;
3731 unsigned long trace_flags = tr->trace_flags;
3732
3733 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3734 return;
3735
3736 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3737 /* print nothing if the buffers are empty */
3738 if (trace_empty(iter))
3739 return;
3740 print_trace_header(m, iter);
3741 if (!(trace_flags & TRACE_ITER(VERBOSE)))
3742 print_lat_help_header(m);
3743 } else {
3744 if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3745 if (trace_flags & TRACE_ITER(IRQ_INFO))
3746 print_func_help_header_irq(iter->array_buffer,
3747 m, trace_flags);
3748 else
3749 print_func_help_header(iter->array_buffer, m,
3750 trace_flags);
3751 }
3752 }
3753 }
3754
test_ftrace_alive(struct seq_file * m)3755 static void test_ftrace_alive(struct seq_file *m)
3756 {
3757 if (!ftrace_is_dead())
3758 return;
3759 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3760 "# MAY BE MISSING FUNCTION EVENTS\n");
3761 }
3762
3763 #ifdef CONFIG_TRACER_SNAPSHOT
show_snapshot_main_help(struct seq_file * m)3764 static void show_snapshot_main_help(struct seq_file *m)
3765 {
3766 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3767 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3768 "# Takes a snapshot of the main buffer.\n"
3769 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3770 "# (Doesn't have to be '2' works with any number that\n"
3771 "# is not a '0' or '1')\n");
3772 }
3773
show_snapshot_percpu_help(struct seq_file * m)3774 static void show_snapshot_percpu_help(struct seq_file *m)
3775 {
3776 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3778 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3779 "# Takes a snapshot of the main buffer for this cpu.\n");
3780 #else
3781 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3782 "# Must use main snapshot file to allocate.\n");
3783 #endif
3784 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3785 "# (Doesn't have to be '2' works with any number that\n"
3786 "# is not a '0' or '1')\n");
3787 }
3788
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3790 {
3791 if (iter->tr->allocated_snapshot)
3792 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3793 else
3794 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3795
3796 seq_puts(m, "# Snapshot commands:\n");
3797 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3798 show_snapshot_main_help(m);
3799 else
3800 show_snapshot_percpu_help(m);
3801 }
3802 #else
3803 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3805 #endif
3806
s_show(struct seq_file * m,void * v)3807 static int s_show(struct seq_file *m, void *v)
3808 {
3809 struct trace_iterator *iter = v;
3810 int ret;
3811
3812 if (iter->ent == NULL) {
3813 if (iter->tr) {
3814 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3815 seq_puts(m, "#\n");
3816 test_ftrace_alive(m);
3817 }
3818 if (iter->snapshot && trace_empty(iter))
3819 print_snapshot_help(m, iter);
3820 else if (iter->trace && iter->trace->print_header)
3821 iter->trace->print_header(m);
3822 else
3823 trace_default_header(m);
3824
3825 } else if (iter->leftover) {
3826 /*
3827 * If we filled the seq_file buffer earlier, we
3828 * want to just show it now.
3829 */
3830 ret = trace_print_seq(m, &iter->seq);
3831
3832 /* ret should this time be zero, but you never know */
3833 iter->leftover = ret;
3834
3835 } else {
3836 ret = print_trace_line(iter);
3837 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3838 iter->seq.full = 0;
3839 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3840 }
3841 ret = trace_print_seq(m, &iter->seq);
3842 /*
3843 * If we overflow the seq_file buffer, then it will
3844 * ask us for this data again at start up.
3845 * Use that instead.
3846 * ret is 0 if seq_file write succeeded.
3847 * -1 otherwise.
3848 */
3849 iter->leftover = ret;
3850 }
3851
3852 return 0;
3853 }
3854
3855 /*
3856 * Should be used after trace_array_get(), trace_types_lock
3857 * ensures that i_cdev was already initialized.
3858 */
tracing_get_cpu(struct inode * inode)3859 static inline int tracing_get_cpu(struct inode *inode)
3860 {
3861 if (inode->i_cdev) /* See trace_create_cpu_file() */
3862 return (long)inode->i_cdev - 1;
3863 return RING_BUFFER_ALL_CPUS;
3864 }
3865
3866 static const struct seq_operations tracer_seq_ops = {
3867 .start = s_start,
3868 .next = s_next,
3869 .stop = s_stop,
3870 .show = s_show,
3871 };
3872
3873 /*
3874 * Note, as iter itself can be allocated and freed in different
3875 * ways, this function is only used to free its content, and not
3876 * the iterator itself. The only requirement to all the allocations
3877 * is that it must zero all fields (kzalloc), as freeing works with
3878 * ethier allocated content or NULL.
3879 */
free_trace_iter_content(struct trace_iterator * iter)3880 static void free_trace_iter_content(struct trace_iterator *iter)
3881 {
3882 /* The fmt is either NULL, allocated or points to static_fmt_buf */
3883 if (iter->fmt != static_fmt_buf)
3884 kfree(iter->fmt);
3885
3886 kfree(iter->temp);
3887 kfree(iter->buffer_iter);
3888 mutex_destroy(&iter->mutex);
3889 free_cpumask_var(iter->started);
3890 }
3891
3892 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3894 {
3895 struct trace_array *tr = inode->i_private;
3896 struct trace_iterator *iter;
3897 int cpu;
3898
3899 if (tracing_disabled)
3900 return ERR_PTR(-ENODEV);
3901
3902 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3903 if (!iter)
3904 return ERR_PTR(-ENOMEM);
3905
3906 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3907 if (!iter->buffer_iter)
3908 goto release;
3909
3910 /*
3911 * trace_find_next_entry() may need to save off iter->ent.
3912 * It will place it into the iter->temp buffer. As most
3913 * events are less than 128, allocate a buffer of that size.
3914 * If one is greater, then trace_find_next_entry() will
3915 * allocate a new buffer to adjust for the bigger iter->ent.
3916 * It's not critical if it fails to get allocated here.
3917 */
3918 iter->temp = kmalloc(128, GFP_KERNEL);
3919 if (iter->temp)
3920 iter->temp_size = 128;
3921
3922 /*
3923 * trace_event_printf() may need to modify given format
3924 * string to replace %p with %px so that it shows real address
3925 * instead of hash value. However, that is only for the event
3926 * tracing, other tracer may not need. Defer the allocation
3927 * until it is needed.
3928 */
3929 iter->fmt = NULL;
3930 iter->fmt_size = 0;
3931
3932 mutex_lock(&trace_types_lock);
3933 iter->trace = tr->current_trace;
3934
3935 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3936 goto fail;
3937
3938 iter->tr = tr;
3939
3940 #ifdef CONFIG_TRACER_SNAPSHOT
3941 /* Currently only the top directory has a snapshot */
3942 if (tr->current_trace->print_max || snapshot)
3943 iter->array_buffer = &tr->snapshot_buffer;
3944 else
3945 #endif
3946 iter->array_buffer = &tr->array_buffer;
3947 iter->snapshot = snapshot;
3948 iter->pos = -1;
3949 iter->cpu_file = tracing_get_cpu(inode);
3950 mutex_init(&iter->mutex);
3951
3952 /* Notify the tracer early; before we stop tracing. */
3953 if (iter->trace->open)
3954 iter->trace->open(iter);
3955
3956 /* Annotate start of buffers if we had overruns */
3957 if (ring_buffer_overruns(iter->array_buffer->buffer))
3958 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3959
3960 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3961 if (trace_clocks[tr->clock_id].in_ns)
3962 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3963
3964 /*
3965 * If pause-on-trace is enabled, then stop the trace while
3966 * dumping, unless this is the "snapshot" file
3967 */
3968 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3969 iter->iter_flags |= TRACE_FILE_PAUSE;
3970 tracing_stop_tr(tr);
3971 }
3972
3973 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3974 for_each_tracing_cpu(cpu) {
3975 iter->buffer_iter[cpu] =
3976 ring_buffer_read_start(iter->array_buffer->buffer,
3977 cpu, GFP_KERNEL);
3978 tracing_iter_reset(iter, cpu);
3979 }
3980 } else {
3981 cpu = iter->cpu_file;
3982 iter->buffer_iter[cpu] =
3983 ring_buffer_read_start(iter->array_buffer->buffer,
3984 cpu, GFP_KERNEL);
3985 tracing_iter_reset(iter, cpu);
3986 }
3987
3988 mutex_unlock(&trace_types_lock);
3989
3990 return iter;
3991
3992 fail:
3993 mutex_unlock(&trace_types_lock);
3994 free_trace_iter_content(iter);
3995 release:
3996 seq_release_private(inode, file);
3997 return ERR_PTR(-ENOMEM);
3998 }
3999
tracing_open_generic(struct inode * inode,struct file * filp)4000 int tracing_open_generic(struct inode *inode, struct file *filp)
4001 {
4002 int ret;
4003
4004 ret = tracing_check_open_get_tr(NULL);
4005 if (ret)
4006 return ret;
4007
4008 filp->private_data = inode->i_private;
4009 return 0;
4010 }
4011
4012 /*
4013 * Open and update trace_array ref count.
4014 * Must have the current trace_array passed to it.
4015 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4016 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4017 {
4018 struct trace_array *tr = inode->i_private;
4019 int ret;
4020
4021 ret = tracing_check_open_get_tr(tr);
4022 if (ret)
4023 return ret;
4024
4025 filp->private_data = inode->i_private;
4026
4027 return 0;
4028 }
4029
4030 /*
4031 * The private pointer of the inode is the trace_event_file.
4032 * Update the tr ref count associated to it.
4033 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4034 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4035 {
4036 struct trace_event_file *file = inode->i_private;
4037 int ret;
4038
4039 ret = tracing_check_open_get_tr(file->tr);
4040 if (ret)
4041 return ret;
4042
4043 guard(mutex)(&event_mutex);
4044
4045 /* Fail if the file is marked for removal */
4046 if (file->flags & EVENT_FILE_FL_FREED) {
4047 trace_array_put(file->tr);
4048 return -ENODEV;
4049 } else {
4050 event_file_get(file);
4051 }
4052
4053 filp->private_data = inode->i_private;
4054
4055 return 0;
4056 }
4057
tracing_release_file_tr(struct inode * inode,struct file * filp)4058 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4059 {
4060 struct trace_event_file *file = inode->i_private;
4061
4062 trace_array_put(file->tr);
4063 event_file_put(file);
4064
4065 return 0;
4066 }
4067
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4068 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4069 {
4070 tracing_release_file_tr(inode, filp);
4071 return single_release(inode, filp);
4072 }
4073
tracing_release(struct inode * inode,struct file * file)4074 static int tracing_release(struct inode *inode, struct file *file)
4075 {
4076 struct trace_array *tr = inode->i_private;
4077 struct seq_file *m = file->private_data;
4078 struct trace_iterator *iter;
4079 int cpu;
4080
4081 if (!(file->f_mode & FMODE_READ)) {
4082 trace_array_put(tr);
4083 return 0;
4084 }
4085
4086 /* Writes do not use seq_file */
4087 iter = m->private;
4088 mutex_lock(&trace_types_lock);
4089
4090 for_each_tracing_cpu(cpu) {
4091 if (iter->buffer_iter[cpu])
4092 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4093 }
4094
4095 if (iter->trace && iter->trace->close)
4096 iter->trace->close(iter);
4097
4098 if (iter->iter_flags & TRACE_FILE_PAUSE)
4099 /* reenable tracing if it was previously enabled */
4100 tracing_start_tr(tr);
4101
4102 __trace_array_put(tr);
4103
4104 mutex_unlock(&trace_types_lock);
4105
4106 free_trace_iter_content(iter);
4107 seq_release_private(inode, file);
4108
4109 return 0;
4110 }
4111
tracing_release_generic_tr(struct inode * inode,struct file * file)4112 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4113 {
4114 struct trace_array *tr = inode->i_private;
4115
4116 trace_array_put(tr);
4117 return 0;
4118 }
4119
tracing_single_release_tr(struct inode * inode,struct file * file)4120 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4121 {
4122 struct trace_array *tr = inode->i_private;
4123
4124 trace_array_put(tr);
4125
4126 return single_release(inode, file);
4127 }
4128
4129 static bool update_last_data_if_empty(struct trace_array *tr);
4130
tracing_open(struct inode * inode,struct file * file)4131 static int tracing_open(struct inode *inode, struct file *file)
4132 {
4133 struct trace_array *tr = inode->i_private;
4134 struct trace_iterator *iter;
4135 int ret;
4136
4137 ret = tracing_check_open_get_tr(tr);
4138 if (ret)
4139 return ret;
4140
4141 /* If this file was open for write, then erase contents */
4142 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4143 int cpu = tracing_get_cpu(inode);
4144 struct array_buffer *trace_buf = &tr->array_buffer;
4145
4146 #ifdef CONFIG_TRACER_MAX_TRACE
4147 if (tr->current_trace->print_max)
4148 trace_buf = &tr->snapshot_buffer;
4149 #endif
4150
4151 if (cpu == RING_BUFFER_ALL_CPUS)
4152 tracing_reset_online_cpus(trace_buf);
4153 else
4154 tracing_reset_cpu(trace_buf, cpu);
4155
4156 update_last_data_if_empty(tr);
4157 }
4158
4159 if (file->f_mode & FMODE_READ) {
4160 iter = __tracing_open(inode, file, false);
4161 if (IS_ERR(iter))
4162 ret = PTR_ERR(iter);
4163 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4164 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4165 }
4166
4167 if (ret < 0)
4168 trace_array_put(tr);
4169
4170 return ret;
4171 }
4172
4173 /*
4174 * Some tracers are not suitable for instance buffers.
4175 * A tracer is always available for the global array (toplevel)
4176 * or if it explicitly states that it is.
4177 */
4178 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4179 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4180 {
4181 /* arrays with mapped buffer range do not have snapshots */
4182 if (tr->range_addr_start && tracer_uses_snapshot(t))
4183 return false;
4184 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4185 }
4186
4187 /* Find the next tracer that this trace array may use */
4188 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4189 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4190 {
4191 while (t && !trace_ok_for_array(t, tr))
4192 t = t->next;
4193
4194 return t;
4195 }
4196
4197 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4198 t_next(struct seq_file *m, void *v, loff_t *pos)
4199 {
4200 struct trace_array *tr = m->private;
4201 struct tracer *t = v;
4202
4203 (*pos)++;
4204
4205 if (t)
4206 t = get_tracer_for_array(tr, t->next);
4207
4208 return t;
4209 }
4210
t_start(struct seq_file * m,loff_t * pos)4211 static void *t_start(struct seq_file *m, loff_t *pos)
4212 {
4213 struct trace_array *tr = m->private;
4214 struct tracer *t;
4215 loff_t l = 0;
4216
4217 mutex_lock(&trace_types_lock);
4218
4219 t = get_tracer_for_array(tr, trace_types);
4220 for (; t && l < *pos; t = t_next(m, t, &l))
4221 ;
4222
4223 return t;
4224 }
4225
t_stop(struct seq_file * m,void * p)4226 static void t_stop(struct seq_file *m, void *p)
4227 {
4228 mutex_unlock(&trace_types_lock);
4229 }
4230
t_show(struct seq_file * m,void * v)4231 static int t_show(struct seq_file *m, void *v)
4232 {
4233 struct tracer *t = v;
4234
4235 if (!t)
4236 return 0;
4237
4238 seq_puts(m, t->name);
4239 if (t->next)
4240 seq_putc(m, ' ');
4241 else
4242 seq_putc(m, '\n');
4243
4244 return 0;
4245 }
4246
4247 static const struct seq_operations show_traces_seq_ops = {
4248 .start = t_start,
4249 .next = t_next,
4250 .stop = t_stop,
4251 .show = t_show,
4252 };
4253
show_traces_open(struct inode * inode,struct file * file)4254 static int show_traces_open(struct inode *inode, struct file *file)
4255 {
4256 struct trace_array *tr = inode->i_private;
4257 struct seq_file *m;
4258 int ret;
4259
4260 ret = tracing_check_open_get_tr(tr);
4261 if (ret)
4262 return ret;
4263
4264 ret = seq_open(file, &show_traces_seq_ops);
4265 if (ret) {
4266 trace_array_put(tr);
4267 return ret;
4268 }
4269
4270 m = file->private_data;
4271 m->private = tr;
4272
4273 return 0;
4274 }
4275
tracing_seq_release(struct inode * inode,struct file * file)4276 static int tracing_seq_release(struct inode *inode, struct file *file)
4277 {
4278 struct trace_array *tr = inode->i_private;
4279
4280 trace_array_put(tr);
4281 return seq_release(inode, file);
4282 }
4283
4284 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4285 tracing_write_stub(struct file *filp, const char __user *ubuf,
4286 size_t count, loff_t *ppos)
4287 {
4288 return count;
4289 }
4290
tracing_lseek(struct file * file,loff_t offset,int whence)4291 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4292 {
4293 int ret;
4294
4295 if (file->f_mode & FMODE_READ)
4296 ret = seq_lseek(file, offset, whence);
4297 else
4298 file->f_pos = ret = 0;
4299
4300 return ret;
4301 }
4302
4303 static const struct file_operations tracing_fops = {
4304 .open = tracing_open,
4305 .read = seq_read,
4306 .read_iter = seq_read_iter,
4307 .splice_read = copy_splice_read,
4308 .write = tracing_write_stub,
4309 .llseek = tracing_lseek,
4310 .release = tracing_release,
4311 };
4312
4313 static const struct file_operations show_traces_fops = {
4314 .open = show_traces_open,
4315 .read = seq_read,
4316 .llseek = seq_lseek,
4317 .release = tracing_seq_release,
4318 };
4319
4320 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4321 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4322 size_t count, loff_t *ppos)
4323 {
4324 struct trace_array *tr = file_inode(filp)->i_private;
4325 char *mask_str __free(kfree) = NULL;
4326 int len;
4327
4328 len = snprintf(NULL, 0, "%*pb\n",
4329 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4330 mask_str = kmalloc(len, GFP_KERNEL);
4331 if (!mask_str)
4332 return -ENOMEM;
4333
4334 len = snprintf(mask_str, len, "%*pb\n",
4335 cpumask_pr_args(tr->tracing_cpumask));
4336 if (len >= count)
4337 return -EINVAL;
4338
4339 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4340 }
4341
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4342 int tracing_set_cpumask(struct trace_array *tr,
4343 cpumask_var_t tracing_cpumask_new)
4344 {
4345 int cpu;
4346
4347 if (!tr)
4348 return -EINVAL;
4349
4350 local_irq_disable();
4351 arch_spin_lock(&tr->max_lock);
4352 for_each_tracing_cpu(cpu) {
4353 /*
4354 * Increase/decrease the disabled counter if we are
4355 * about to flip a bit in the cpumask:
4356 */
4357 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4358 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4359 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4360 #ifdef CONFIG_TRACER_SNAPSHOT
4361 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
4362 #endif
4363 }
4364 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4365 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4366 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4367 #ifdef CONFIG_TRACER_SNAPSHOT
4368 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
4369 #endif
4370 }
4371 }
4372 arch_spin_unlock(&tr->max_lock);
4373 local_irq_enable();
4374
4375 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4376
4377 return 0;
4378 }
4379
4380 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4381 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4382 size_t count, loff_t *ppos)
4383 {
4384 struct trace_array *tr = file_inode(filp)->i_private;
4385 cpumask_var_t tracing_cpumask_new;
4386 int err;
4387
4388 if (count == 0 || count > KMALLOC_MAX_SIZE)
4389 return -EINVAL;
4390
4391 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4392 return -ENOMEM;
4393
4394 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4395 if (err)
4396 goto err_free;
4397
4398 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4399 if (err)
4400 goto err_free;
4401
4402 free_cpumask_var(tracing_cpumask_new);
4403
4404 return count;
4405
4406 err_free:
4407 free_cpumask_var(tracing_cpumask_new);
4408
4409 return err;
4410 }
4411
4412 static const struct file_operations tracing_cpumask_fops = {
4413 .open = tracing_open_generic_tr,
4414 .read = tracing_cpumask_read,
4415 .write = tracing_cpumask_write,
4416 .release = tracing_release_generic_tr,
4417 .llseek = generic_file_llseek,
4418 };
4419
tracing_trace_options_show(struct seq_file * m,void * v)4420 static int tracing_trace_options_show(struct seq_file *m, void *v)
4421 {
4422 struct tracer_opt *trace_opts;
4423 struct trace_array *tr = m->private;
4424 struct tracer_flags *flags;
4425 u32 tracer_flags;
4426 int i;
4427
4428 guard(mutex)(&trace_types_lock);
4429
4430 for (i = 0; trace_options[i]; i++) {
4431 if (tr->trace_flags & (1ULL << i))
4432 seq_printf(m, "%s\n", trace_options[i]);
4433 else
4434 seq_printf(m, "no%s\n", trace_options[i]);
4435 }
4436
4437 flags = tr->current_trace_flags;
4438 if (!flags || !flags->opts)
4439 return 0;
4440
4441 tracer_flags = flags->val;
4442 trace_opts = flags->opts;
4443
4444 for (i = 0; trace_opts[i].name; i++) {
4445 if (tracer_flags & trace_opts[i].bit)
4446 seq_printf(m, "%s\n", trace_opts[i].name);
4447 else
4448 seq_printf(m, "no%s\n", trace_opts[i].name);
4449 }
4450
4451 return 0;
4452 }
4453
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4454 static int __set_tracer_option(struct trace_array *tr,
4455 struct tracer_flags *tracer_flags,
4456 struct tracer_opt *opts, int neg)
4457 {
4458 struct tracer *trace = tracer_flags->trace;
4459 int ret = 0;
4460
4461 if (trace->set_flag)
4462 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4463 if (ret)
4464 return ret;
4465
4466 if (neg)
4467 tracer_flags->val &= ~opts->bit;
4468 else
4469 tracer_flags->val |= opts->bit;
4470 return 0;
4471 }
4472
4473 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4474 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4475 {
4476 struct tracer_flags *tracer_flags = tr->current_trace_flags;
4477 struct tracer_opt *opts = NULL;
4478 int i;
4479
4480 if (!tracer_flags || !tracer_flags->opts)
4481 return 0;
4482
4483 for (i = 0; tracer_flags->opts[i].name; i++) {
4484 opts = &tracer_flags->opts[i];
4485
4486 if (strcmp(cmp, opts->name) == 0)
4487 return __set_tracer_option(tr, tracer_flags, opts, neg);
4488 }
4489
4490 return -EINVAL;
4491 }
4492
4493 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)4494 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
4495 {
4496 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
4497 return -1;
4498
4499 return 0;
4500 }
4501
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)4502 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
4503 {
4504 switch (mask) {
4505 case TRACE_ITER(RECORD_TGID):
4506 case TRACE_ITER(RECORD_CMD):
4507 case TRACE_ITER(TRACE_PRINTK):
4508 case TRACE_ITER(COPY_MARKER):
4509 lockdep_assert_held(&event_mutex);
4510 }
4511
4512 /* do nothing if flag is already set */
4513 if (!!(tr->trace_flags & mask) == !!enabled)
4514 return 0;
4515
4516 /* Give the tracer a chance to approve the change */
4517 if (tr->current_trace->flag_changed)
4518 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4519 return -EINVAL;
4520
4521 switch (mask) {
4522 case TRACE_ITER(TRACE_PRINTK):
4523 if (enabled) {
4524 update_printk_trace(tr);
4525 } else {
4526 /*
4527 * The global_trace cannot clear this.
4528 * It's flag only gets cleared if another instance sets it.
4529 */
4530 if (printk_trace == &global_trace)
4531 return -EINVAL;
4532 /*
4533 * An instance must always have it set.
4534 * by default, that's the global_trace instance.
4535 */
4536 if (printk_trace == tr)
4537 update_printk_trace(&global_trace);
4538 }
4539 break;
4540
4541 case TRACE_ITER(COPY_MARKER):
4542 update_marker_trace(tr, enabled);
4543 /* update_marker_trace updates the tr->trace_flags */
4544 return 0;
4545 }
4546
4547 if (enabled)
4548 tr->trace_flags |= mask;
4549 else
4550 tr->trace_flags &= ~mask;
4551
4552 switch (mask) {
4553 case TRACE_ITER(RECORD_CMD):
4554 trace_event_enable_cmd_record(enabled);
4555 break;
4556
4557 case TRACE_ITER(RECORD_TGID):
4558
4559 if (trace_alloc_tgid_map() < 0) {
4560 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4561 return -ENOMEM;
4562 }
4563
4564 trace_event_enable_tgid_record(enabled);
4565 break;
4566
4567 case TRACE_ITER(EVENT_FORK):
4568 trace_event_follow_fork(tr, enabled);
4569 break;
4570
4571 case TRACE_ITER(FUNC_FORK):
4572 ftrace_pid_follow_fork(tr, enabled);
4573 break;
4574
4575 case TRACE_ITER(OVERWRITE):
4576 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4577 #ifdef CONFIG_TRACER_SNAPSHOT
4578 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4579 #endif
4580 break;
4581
4582 case TRACE_ITER(PRINTK):
4583 trace_printk_start_stop_comm(enabled);
4584 trace_printk_control(enabled);
4585 break;
4586
4587 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4588 case TRACE_GRAPH_GRAPH_TIME:
4589 ftrace_graph_graph_time_control(enabled);
4590 break;
4591 #endif
4592 }
4593
4594 return 0;
4595 }
4596
trace_set_options(struct trace_array * tr,char * option)4597 int trace_set_options(struct trace_array *tr, char *option)
4598 {
4599 char *cmp;
4600 int neg = 0;
4601 int ret;
4602 size_t orig_len = strlen(option);
4603 int len;
4604
4605 cmp = strstrip(option);
4606
4607 len = str_has_prefix(cmp, "no");
4608 if (len)
4609 neg = 1;
4610
4611 cmp += len;
4612
4613 mutex_lock(&event_mutex);
4614 mutex_lock(&trace_types_lock);
4615
4616 ret = match_string(trace_options, -1, cmp);
4617 /* If no option could be set, test the specific tracer options */
4618 if (ret < 0)
4619 ret = set_tracer_option(tr, cmp, neg);
4620 else
4621 ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4622
4623 mutex_unlock(&trace_types_lock);
4624 mutex_unlock(&event_mutex);
4625
4626 /*
4627 * If the first trailing whitespace is replaced with '\0' by strstrip,
4628 * turn it back into a space.
4629 */
4630 if (orig_len > strlen(option))
4631 option[strlen(option)] = ' ';
4632
4633 return ret;
4634 }
4635
apply_trace_boot_options(void)4636 static void __init apply_trace_boot_options(void)
4637 {
4638 char *buf = trace_boot_options_buf;
4639 char *option;
4640
4641 while (true) {
4642 option = strsep(&buf, ",");
4643
4644 if (!option)
4645 break;
4646
4647 if (*option)
4648 trace_set_options(&global_trace, option);
4649
4650 /* Put back the comma to allow this to be called again */
4651 if (buf)
4652 *(buf - 1) = ',';
4653 }
4654 }
4655
4656 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4657 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4658 size_t cnt, loff_t *ppos)
4659 {
4660 struct seq_file *m = filp->private_data;
4661 struct trace_array *tr = m->private;
4662 char buf[64];
4663 int ret;
4664
4665 if (cnt >= sizeof(buf))
4666 return -EINVAL;
4667
4668 if (copy_from_user(buf, ubuf, cnt))
4669 return -EFAULT;
4670
4671 buf[cnt] = 0;
4672
4673 ret = trace_set_options(tr, buf);
4674 if (ret < 0)
4675 return ret;
4676
4677 *ppos += cnt;
4678
4679 return cnt;
4680 }
4681
tracing_trace_options_open(struct inode * inode,struct file * file)4682 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4683 {
4684 struct trace_array *tr = inode->i_private;
4685 int ret;
4686
4687 ret = tracing_check_open_get_tr(tr);
4688 if (ret)
4689 return ret;
4690
4691 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4692 if (ret < 0)
4693 trace_array_put(tr);
4694
4695 return ret;
4696 }
4697
4698 static const struct file_operations tracing_iter_fops = {
4699 .open = tracing_trace_options_open,
4700 .read = seq_read,
4701 .llseek = seq_lseek,
4702 .release = tracing_single_release_tr,
4703 .write = tracing_trace_options_write,
4704 };
4705
4706 static const char readme_msg[] =
4707 "tracing mini-HOWTO:\n\n"
4708 "By default tracefs removes all OTH file permission bits.\n"
4709 "When mounting tracefs an optional group id can be specified\n"
4710 "which adds the group to every directory and file in tracefs:\n\n"
4711 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4712 "# echo 0 > tracing_on : quick way to disable tracing\n"
4713 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4714 " Important files:\n"
4715 " trace\t\t\t- The static contents of the buffer\n"
4716 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4717 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4718 " current_tracer\t- function and latency tracers\n"
4719 " available_tracers\t- list of configured tracers for current_tracer\n"
4720 " error_log\t- error log for failed commands (that support it)\n"
4721 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4722 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4723 " trace_clock\t\t- change the clock used to order events\n"
4724 " local: Per cpu clock but may not be synced across CPUs\n"
4725 " global: Synced across CPUs but slows tracing down.\n"
4726 " counter: Not a clock, but just an increment\n"
4727 " uptime: Jiffy counter from time of boot\n"
4728 " perf: Same clock that perf events use\n"
4729 #ifdef CONFIG_X86_64
4730 " x86-tsc: TSC cycle counter\n"
4731 #endif
4732 "\n timestamp_mode\t- view the mode used to timestamp events\n"
4733 " delta: Delta difference against a buffer-wide timestamp\n"
4734 " absolute: Absolute (standalone) timestamp\n"
4735 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4736 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4737 " tracing_cpumask\t- Limit which CPUs to trace\n"
4738 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4739 "\t\t\t Remove sub-buffer with rmdir\n"
4740 " trace_options\t\t- Set format or modify how tracing happens\n"
4741 "\t\t\t Disable an option by prefixing 'no' to the\n"
4742 "\t\t\t option name\n"
4743 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4744 #ifdef CONFIG_DYNAMIC_FTRACE
4745 "\n available_filter_functions - list of functions that can be filtered on\n"
4746 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4747 "\t\t\t functions\n"
4748 "\t accepts: func_full_name or glob-matching-pattern\n"
4749 "\t modules: Can select a group via module\n"
4750 "\t Format: :mod:<module-name>\n"
4751 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4752 "\t triggers: a command to perform when function is hit\n"
4753 "\t Format: <function>:<trigger>[:count]\n"
4754 "\t trigger: traceon, traceoff\n"
4755 "\t\t enable_event:<system>:<event>\n"
4756 "\t\t disable_event:<system>:<event>\n"
4757 #ifdef CONFIG_STACKTRACE
4758 "\t\t stacktrace\n"
4759 #endif
4760 #ifdef CONFIG_TRACER_SNAPSHOT
4761 "\t\t snapshot\n"
4762 #endif
4763 "\t\t dump\n"
4764 "\t\t cpudump\n"
4765 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4766 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4767 "\t The first one will disable tracing every time do_fault is hit\n"
4768 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4769 "\t The first time do trap is hit and it disables tracing, the\n"
4770 "\t counter will decrement to 2. If tracing is already disabled,\n"
4771 "\t the counter will not decrement. It only decrements when the\n"
4772 "\t trigger did work\n"
4773 "\t To remove trigger without count:\n"
4774 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4775 "\t To remove trigger with a count:\n"
4776 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4777 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4778 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4779 "\t modules: Can select a group via module command :mod:\n"
4780 "\t Does not accept triggers\n"
4781 #endif /* CONFIG_DYNAMIC_FTRACE */
4782 #ifdef CONFIG_FUNCTION_TRACER
4783 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4784 "\t\t (function)\n"
4785 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4786 "\t\t (function)\n"
4787 #endif
4788 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4789 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4790 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4791 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4792 #endif
4793 #ifdef CONFIG_TRACER_SNAPSHOT
4794 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4795 "\t\t\t snapshot buffer. Read the contents for more\n"
4796 "\t\t\t information\n"
4797 #endif
4798 #ifdef CONFIG_STACK_TRACER
4799 " stack_trace\t\t- Shows the max stack trace when active\n"
4800 " stack_max_size\t- Shows current max stack size that was traced\n"
4801 "\t\t\t Write into this file to reset the max size (trigger a\n"
4802 "\t\t\t new trace)\n"
4803 #ifdef CONFIG_DYNAMIC_FTRACE
4804 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4805 "\t\t\t traces\n"
4806 #endif
4807 #endif /* CONFIG_STACK_TRACER */
4808 #ifdef CONFIG_DYNAMIC_EVENTS
4809 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4810 "\t\t\t Write into this file to define/undefine new trace events.\n"
4811 #endif
4812 #ifdef CONFIG_KPROBE_EVENTS
4813 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4814 "\t\t\t Write into this file to define/undefine new trace events.\n"
4815 #endif
4816 #ifdef CONFIG_UPROBE_EVENTS
4817 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4818 "\t\t\t Write into this file to define/undefine new trace events.\n"
4819 #endif
4820 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4821 defined(CONFIG_FPROBE_EVENTS)
4822 "\t accepts: event-definitions (one definition per line)\n"
4823 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4824 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4825 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4826 #endif
4827 #ifdef CONFIG_FPROBE_EVENTS
4828 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4829 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4830 #endif
4831 #ifdef CONFIG_HIST_TRIGGERS
4832 "\t s:[synthetic/]<event> <field> [<field>]\n"
4833 #endif
4834 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4835 "\t -:[<group>/][<event>]\n"
4836 #ifdef CONFIG_KPROBE_EVENTS
4837 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4838 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4839 #endif
4840 #ifdef CONFIG_UPROBE_EVENTS
4841 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4842 #endif
4843 "\t args: <name>=fetcharg[:type]\n"
4844 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4845 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4846 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4847 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4848 "\t <argname>[->field[->field|.field...]],\n"
4849 #endif
4850 #else
4851 "\t $stack<index>, $stack, $retval, $comm,\n"
4852 #endif
4853 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4854 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
4855 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4856 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4857 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4858 #ifdef CONFIG_HIST_TRIGGERS
4859 "\t field: <stype> <name>;\n"
4860 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4861 "\t [unsigned] char/int/long\n"
4862 #endif
4863 "\t efield: For event probes ('e' types), the field is on of the fields\n"
4864 "\t of the <attached-group>/<attached-event>.\n"
4865 #endif
4866 " set_event\t\t- Enables events by name written into it\n"
4867 "\t\t\t Can enable module events via: :mod:<module>\n"
4868 " events/\t\t- Directory containing all trace event subsystems:\n"
4869 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4870 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4871 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4872 "\t\t\t events\n"
4873 " filter\t\t- If set, only events passing filter are traced\n"
4874 " events/<system>/<event>/\t- Directory containing control files for\n"
4875 "\t\t\t <event>:\n"
4876 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4877 " filter\t\t- If set, only events passing filter are traced\n"
4878 " trigger\t\t- If set, a command to perform when event is hit\n"
4879 "\t Format: <trigger>[:count][if <filter>]\n"
4880 "\t trigger: traceon, traceoff\n"
4881 "\t enable_event:<system>:<event>\n"
4882 "\t disable_event:<system>:<event>\n"
4883 #ifdef CONFIG_HIST_TRIGGERS
4884 "\t enable_hist:<system>:<event>\n"
4885 "\t disable_hist:<system>:<event>\n"
4886 #endif
4887 #ifdef CONFIG_STACKTRACE
4888 "\t\t stacktrace\n"
4889 #endif
4890 #ifdef CONFIG_TRACER_SNAPSHOT
4891 "\t\t snapshot\n"
4892 #endif
4893 #ifdef CONFIG_HIST_TRIGGERS
4894 "\t\t hist (see below)\n"
4895 #endif
4896 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4897 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4898 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4899 "\t events/block/block_unplug/trigger\n"
4900 "\t The first disables tracing every time block_unplug is hit.\n"
4901 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4902 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4903 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4904 "\t Like function triggers, the counter is only decremented if it\n"
4905 "\t enabled or disabled tracing.\n"
4906 "\t To remove a trigger without a count:\n"
4907 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4908 "\t To remove a trigger with a count:\n"
4909 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4910 "\t Filters can be ignored when removing a trigger.\n"
4911 #ifdef CONFIG_HIST_TRIGGERS
4912 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4913 "\t Format: hist:keys=<field1[,field2,...]>\n"
4914 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4915 "\t [:values=<field1[,field2,...]>]\n"
4916 "\t [:sort=<field1[,field2,...]>]\n"
4917 "\t [:size=#entries]\n"
4918 "\t [:pause][:continue][:clear]\n"
4919 "\t [:name=histname1]\n"
4920 "\t [:nohitcount]\n"
4921 "\t [:<handler>.<action>]\n"
4922 "\t [if <filter>]\n\n"
4923 "\t Note, special fields can be used as well:\n"
4924 "\t common_timestamp - to record current timestamp\n"
4925 "\t common_cpu - to record the CPU the event happened on\n"
4926 "\n"
4927 "\t A hist trigger variable can be:\n"
4928 "\t - a reference to a field e.g. x=current_timestamp,\n"
4929 "\t - a reference to another variable e.g. y=$x,\n"
4930 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
4931 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4932 "\n"
4933 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4934 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
4935 "\t variable reference, field or numeric literal.\n"
4936 "\n"
4937 "\t When a matching event is hit, an entry is added to a hash\n"
4938 "\t table using the key(s) and value(s) named, and the value of a\n"
4939 "\t sum called 'hitcount' is incremented. Keys and values\n"
4940 "\t correspond to fields in the event's format description. Keys\n"
4941 "\t can be any field, or the special string 'common_stacktrace'.\n"
4942 "\t Compound keys consisting of up to two fields can be specified\n"
4943 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4944 "\t fields. Sort keys consisting of up to two fields can be\n"
4945 "\t specified using the 'sort' keyword. The sort direction can\n"
4946 "\t be modified by appending '.descending' or '.ascending' to a\n"
4947 "\t sort field. The 'size' parameter can be used to specify more\n"
4948 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4949 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4950 "\t its histogram data will be shared with other triggers of the\n"
4951 "\t same name, and trigger hits will update this common data.\n\n"
4952 "\t Reading the 'hist' file for the event will dump the hash\n"
4953 "\t table in its entirety to stdout. If there are multiple hist\n"
4954 "\t triggers attached to an event, there will be a table for each\n"
4955 "\t trigger in the output. The table displayed for a named\n"
4956 "\t trigger will be the same as any other instance having the\n"
4957 "\t same name. The default format used to display a given field\n"
4958 "\t can be modified by appending any of the following modifiers\n"
4959 "\t to the field name, as applicable:\n\n"
4960 "\t .hex display a number as a hex value\n"
4961 "\t .sym display an address as a symbol\n"
4962 "\t .sym-offset display an address as a symbol and offset\n"
4963 "\t .execname display a common_pid as a program name\n"
4964 "\t .syscall display a syscall id as a syscall name\n"
4965 "\t .log2 display log2 value rather than raw number\n"
4966 "\t .buckets=size display values in groups of size rather than raw number\n"
4967 "\t .usecs display a common_timestamp in microseconds\n"
4968 "\t .percent display a number of percentage value\n"
4969 "\t .graph display a bar-graph of a value\n\n"
4970 "\t The 'pause' parameter can be used to pause an existing hist\n"
4971 "\t trigger or to start a hist trigger but not log any events\n"
4972 "\t until told to do so. 'continue' can be used to start or\n"
4973 "\t restart a paused hist trigger.\n\n"
4974 "\t The 'clear' parameter will clear the contents of a running\n"
4975 "\t hist trigger and leave its current paused/active state\n"
4976 "\t unchanged.\n\n"
4977 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4978 "\t raw hitcount in the histogram.\n\n"
4979 "\t The enable_hist and disable_hist triggers can be used to\n"
4980 "\t have one event conditionally start and stop another event's\n"
4981 "\t already-attached hist trigger. The syntax is analogous to\n"
4982 "\t the enable_event and disable_event triggers.\n\n"
4983 "\t Hist trigger handlers and actions are executed whenever a\n"
4984 "\t a histogram entry is added or updated. They take the form:\n\n"
4985 "\t <handler>.<action>\n\n"
4986 "\t The available handlers are:\n\n"
4987 "\t onmatch(matching.event) - invoke on addition or update\n"
4988 "\t onmax(var) - invoke if var exceeds current max\n"
4989 "\t onchange(var) - invoke action if var changes\n\n"
4990 "\t The available actions are:\n\n"
4991 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
4992 "\t save(field,...) - save current event fields\n"
4993 #ifdef CONFIG_TRACER_SNAPSHOT
4994 "\t snapshot() - snapshot the trace buffer\n\n"
4995 #endif
4996 #ifdef CONFIG_SYNTH_EVENTS
4997 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4998 "\t Write into this file to define/undefine new synthetic events.\n"
4999 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5000 #endif
5001 #endif
5002 ;
5003
5004 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5005 tracing_readme_read(struct file *filp, char __user *ubuf,
5006 size_t cnt, loff_t *ppos)
5007 {
5008 return simple_read_from_buffer(ubuf, cnt, ppos,
5009 readme_msg, strlen(readme_msg));
5010 }
5011
5012 static const struct file_operations tracing_readme_fops = {
5013 .open = tracing_open_generic,
5014 .read = tracing_readme_read,
5015 .llseek = generic_file_llseek,
5016 };
5017
5018 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5019 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5020 update_eval_map(union trace_eval_map_item *ptr)
5021 {
5022 if (!ptr->map.eval_string) {
5023 if (ptr->tail.next) {
5024 ptr = ptr->tail.next;
5025 /* Set ptr to the next real item (skip head) */
5026 ptr++;
5027 } else
5028 return NULL;
5029 }
5030 return ptr;
5031 }
5032
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5033 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5034 {
5035 union trace_eval_map_item *ptr = v;
5036
5037 /*
5038 * Paranoid! If ptr points to end, we don't want to increment past it.
5039 * This really should never happen.
5040 */
5041 (*pos)++;
5042 ptr = update_eval_map(ptr);
5043 if (WARN_ON_ONCE(!ptr))
5044 return NULL;
5045
5046 ptr++;
5047 ptr = update_eval_map(ptr);
5048
5049 return ptr;
5050 }
5051
eval_map_start(struct seq_file * m,loff_t * pos)5052 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5053 {
5054 union trace_eval_map_item *v;
5055 loff_t l = 0;
5056
5057 mutex_lock(&trace_eval_mutex);
5058
5059 v = trace_eval_maps;
5060 if (v)
5061 v++;
5062
5063 while (v && l < *pos) {
5064 v = eval_map_next(m, v, &l);
5065 }
5066
5067 return v;
5068 }
5069
eval_map_stop(struct seq_file * m,void * v)5070 static void eval_map_stop(struct seq_file *m, void *v)
5071 {
5072 mutex_unlock(&trace_eval_mutex);
5073 }
5074
eval_map_show(struct seq_file * m,void * v)5075 static int eval_map_show(struct seq_file *m, void *v)
5076 {
5077 union trace_eval_map_item *ptr = v;
5078
5079 seq_printf(m, "%s %ld (%s)\n",
5080 ptr->map.eval_string, ptr->map.eval_value,
5081 ptr->map.system);
5082
5083 return 0;
5084 }
5085
5086 static const struct seq_operations tracing_eval_map_seq_ops = {
5087 .start = eval_map_start,
5088 .next = eval_map_next,
5089 .stop = eval_map_stop,
5090 .show = eval_map_show,
5091 };
5092
tracing_eval_map_open(struct inode * inode,struct file * filp)5093 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5094 {
5095 int ret;
5096
5097 ret = tracing_check_open_get_tr(NULL);
5098 if (ret)
5099 return ret;
5100
5101 return seq_open(filp, &tracing_eval_map_seq_ops);
5102 }
5103
5104 static const struct file_operations tracing_eval_map_fops = {
5105 .open = tracing_eval_map_open,
5106 .read = seq_read,
5107 .llseek = seq_lseek,
5108 .release = seq_release,
5109 };
5110
5111 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5112 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5113 {
5114 /* Return tail of array given the head */
5115 return ptr + ptr->head.length + 1;
5116 }
5117
5118 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5119 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5120 int len)
5121 {
5122 struct trace_eval_map **stop;
5123 struct trace_eval_map **map;
5124 union trace_eval_map_item *map_array;
5125 union trace_eval_map_item *ptr;
5126
5127 stop = start + len;
5128
5129 /*
5130 * The trace_eval_maps contains the map plus a head and tail item,
5131 * where the head holds the module and length of array, and the
5132 * tail holds a pointer to the next list.
5133 */
5134 map_array = kmalloc_objs(*map_array, len + 2);
5135 if (!map_array) {
5136 pr_warn("Unable to allocate trace eval mapping\n");
5137 return;
5138 }
5139
5140 guard(mutex)(&trace_eval_mutex);
5141
5142 if (!trace_eval_maps)
5143 trace_eval_maps = map_array;
5144 else {
5145 ptr = trace_eval_maps;
5146 for (;;) {
5147 ptr = trace_eval_jmp_to_tail(ptr);
5148 if (!ptr->tail.next)
5149 break;
5150 ptr = ptr->tail.next;
5151
5152 }
5153 ptr->tail.next = map_array;
5154 }
5155 map_array->head.mod = mod;
5156 map_array->head.length = len;
5157 map_array++;
5158
5159 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5160 map_array->map = **map;
5161 map_array++;
5162 }
5163 memset(map_array, 0, sizeof(*map_array));
5164 }
5165
trace_create_eval_file(struct dentry * d_tracer)5166 static void trace_create_eval_file(struct dentry *d_tracer)
5167 {
5168 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5169 NULL, &tracing_eval_map_fops);
5170 }
5171
5172 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5173 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5174 static inline void trace_insert_eval_map_file(struct module *mod,
5175 struct trace_eval_map **start, int len) { }
5176 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5177
5178 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5179 trace_event_update_with_eval_map(struct module *mod,
5180 struct trace_eval_map **start,
5181 int len)
5182 {
5183 struct trace_eval_map **map;
5184
5185 /* Always run sanitizer only if btf_type_tag attr exists. */
5186 if (len <= 0) {
5187 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5188 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5189 __has_attribute(btf_type_tag)))
5190 return;
5191 }
5192
5193 map = start;
5194
5195 trace_event_update_all(map, len);
5196
5197 if (len <= 0)
5198 return;
5199
5200 trace_insert_eval_map_file(mod, start, len);
5201 }
5202
5203 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5204 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5205 size_t cnt, loff_t *ppos)
5206 {
5207 struct trace_array *tr = filp->private_data;
5208 char buf[MAX_TRACER_SIZE+2];
5209 int r;
5210
5211 scoped_guard(mutex, &trace_types_lock) {
5212 r = sprintf(buf, "%s\n", tr->current_trace->name);
5213 }
5214
5215 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5216 }
5217
tracer_init(struct tracer * t,struct trace_array * tr)5218 int tracer_init(struct tracer *t, struct trace_array *tr)
5219 {
5220 tracing_reset_online_cpus(&tr->array_buffer);
5221 update_last_data_if_empty(tr);
5222 return t->init(tr);
5223 }
5224
set_buffer_entries(struct array_buffer * buf,unsigned long val)5225 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5226 {
5227 int cpu;
5228
5229 for_each_tracing_cpu(cpu)
5230 per_cpu_ptr(buf->data, cpu)->entries = val;
5231 }
5232
update_buffer_entries(struct array_buffer * buf,int cpu)5233 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5234 {
5235 if (cpu == RING_BUFFER_ALL_CPUS) {
5236 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5237 } else {
5238 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5239 }
5240 }
5241
5242 #ifdef CONFIG_TRACER_SNAPSHOT
5243 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5244 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5245 struct array_buffer *size_buf, int cpu_id)
5246 {
5247 int cpu, ret = 0;
5248
5249 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5250 for_each_tracing_cpu(cpu) {
5251 ret = ring_buffer_resize(trace_buf->buffer,
5252 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5253 if (ret < 0)
5254 break;
5255 per_cpu_ptr(trace_buf->data, cpu)->entries =
5256 per_cpu_ptr(size_buf->data, cpu)->entries;
5257 }
5258 } else {
5259 ret = ring_buffer_resize(trace_buf->buffer,
5260 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5261 if (ret == 0)
5262 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5263 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5264 }
5265
5266 return ret;
5267 }
5268 #endif /* CONFIG_TRACER_SNAPSHOT */
5269
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5270 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5271 unsigned long size, int cpu)
5272 {
5273 int ret;
5274
5275 /*
5276 * If kernel or user changes the size of the ring buffer
5277 * we use the size that was given, and we can forget about
5278 * expanding it later.
5279 */
5280 trace_set_ring_buffer_expanded(tr);
5281
5282 /* May be called before buffers are initialized */
5283 if (!tr->array_buffer.buffer)
5284 return 0;
5285
5286 /* Do not allow tracing while resizing ring buffer */
5287 tracing_stop_tr(tr);
5288
5289 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5290 if (ret < 0)
5291 goto out_start;
5292
5293 #ifdef CONFIG_TRACER_SNAPSHOT
5294 if (!tr->allocated_snapshot)
5295 goto out;
5296
5297 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
5298 if (ret < 0) {
5299 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5300 &tr->array_buffer, cpu);
5301 if (r < 0) {
5302 /*
5303 * AARGH! We are left with different
5304 * size max buffer!!!!
5305 * The max buffer is our "snapshot" buffer.
5306 * When a tracer needs a snapshot (one of the
5307 * latency tracers), it swaps the max buffer
5308 * with the saved snap shot. We succeeded to
5309 * update the size of the main buffer, but failed to
5310 * update the size of the max buffer. But when we tried
5311 * to reset the main buffer to the original size, we
5312 * failed there too. This is very unlikely to
5313 * happen, but if it does, warn and kill all
5314 * tracing.
5315 */
5316 WARN_ON(1);
5317 tracing_disabled = 1;
5318 }
5319 goto out_start;
5320 }
5321
5322 update_buffer_entries(&tr->snapshot_buffer, cpu);
5323
5324 out:
5325 #endif /* CONFIG_TRACER_SNAPSHOT */
5326
5327 update_buffer_entries(&tr->array_buffer, cpu);
5328 out_start:
5329 tracing_start_tr(tr);
5330 return ret;
5331 }
5332
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5333 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5334 unsigned long size, int cpu_id)
5335 {
5336 guard(mutex)(&trace_types_lock);
5337
5338 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5339 /* make sure, this cpu is enabled in the mask */
5340 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5341 return -EINVAL;
5342 }
5343
5344 return __tracing_resize_ring_buffer(tr, size, cpu_id);
5345 }
5346
5347 struct trace_mod_entry {
5348 unsigned long mod_addr;
5349 char mod_name[MODULE_NAME_LEN];
5350 };
5351
5352 struct trace_scratch {
5353 unsigned int clock_id;
5354 unsigned long text_addr;
5355 unsigned long nr_entries;
5356 struct trace_mod_entry entries[];
5357 };
5358
5359 static DEFINE_MUTEX(scratch_mutex);
5360
cmp_mod_entry(const void * key,const void * pivot)5361 static int cmp_mod_entry(const void *key, const void *pivot)
5362 {
5363 unsigned long addr = (unsigned long)key;
5364 const struct trace_mod_entry *ent = pivot;
5365
5366 if (addr < ent[0].mod_addr)
5367 return -1;
5368
5369 return addr >= ent[1].mod_addr;
5370 }
5371
5372 /**
5373 * trace_adjust_address() - Adjust prev boot address to current address.
5374 * @tr: Persistent ring buffer's trace_array.
5375 * @addr: Address in @tr which is adjusted.
5376 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)5377 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
5378 {
5379 struct trace_module_delta *module_delta;
5380 struct trace_scratch *tscratch;
5381 struct trace_mod_entry *entry;
5382 unsigned long raddr;
5383 int idx = 0, nr_entries;
5384
5385 /* If we don't have last boot delta, return the address */
5386 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5387 return addr;
5388
5389 /* tr->module_delta must be protected by rcu. */
5390 guard(rcu)();
5391 tscratch = tr->scratch;
5392 /* if there is no tscrach, module_delta must be NULL. */
5393 module_delta = READ_ONCE(tr->module_delta);
5394 if (!module_delta || !tscratch->nr_entries ||
5395 tscratch->entries[0].mod_addr > addr) {
5396 raddr = addr + tr->text_delta;
5397 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
5398 is_kernel_rodata(raddr) ? raddr : addr;
5399 }
5400
5401 /* Note that entries must be sorted. */
5402 nr_entries = tscratch->nr_entries;
5403 if (nr_entries == 1 ||
5404 tscratch->entries[nr_entries - 1].mod_addr < addr)
5405 idx = nr_entries - 1;
5406 else {
5407 entry = __inline_bsearch((void *)addr,
5408 tscratch->entries,
5409 nr_entries - 1,
5410 sizeof(tscratch->entries[0]),
5411 cmp_mod_entry);
5412 if (entry)
5413 idx = entry - tscratch->entries;
5414 }
5415
5416 return addr + module_delta->delta[idx];
5417 }
5418
5419 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)5420 static int save_mod(struct module *mod, void *data)
5421 {
5422 struct trace_array *tr = data;
5423 struct trace_scratch *tscratch;
5424 struct trace_mod_entry *entry;
5425 unsigned int size;
5426
5427 tscratch = tr->scratch;
5428 if (!tscratch)
5429 return -1;
5430 size = tr->scratch_size;
5431
5432 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
5433 return -1;
5434
5435 entry = &tscratch->entries[tscratch->nr_entries];
5436
5437 tscratch->nr_entries++;
5438
5439 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
5440 strscpy(entry->mod_name, mod->name);
5441
5442 return 0;
5443 }
5444 #else
save_mod(struct module * mod,void * data)5445 static int save_mod(struct module *mod, void *data)
5446 {
5447 return 0;
5448 }
5449 #endif
5450
update_last_data(struct trace_array * tr)5451 static void update_last_data(struct trace_array *tr)
5452 {
5453 struct trace_module_delta *module_delta;
5454 struct trace_scratch *tscratch;
5455
5456 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
5457 return;
5458
5459 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5460 return;
5461
5462 /* Only if the buffer has previous boot data clear and update it. */
5463 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
5464
5465 /* Reset the module list and reload them */
5466 if (tr->scratch) {
5467 struct trace_scratch *tscratch = tr->scratch;
5468
5469 tscratch->clock_id = tr->clock_id;
5470 memset(tscratch->entries, 0,
5471 flex_array_size(tscratch, entries, tscratch->nr_entries));
5472 tscratch->nr_entries = 0;
5473
5474 guard(mutex)(&scratch_mutex);
5475 module_for_each_mod(save_mod, tr);
5476 }
5477
5478 /*
5479 * Need to clear all CPU buffers as there cannot be events
5480 * from the previous boot mixed with events with this boot
5481 * as that will cause a confusing trace. Need to clear all
5482 * CPU buffers, even for those that may currently be offline.
5483 */
5484 tracing_reset_all_cpus(&tr->array_buffer);
5485
5486 /* Using current data now */
5487 tr->text_delta = 0;
5488
5489 if (!tr->scratch)
5490 return;
5491
5492 tscratch = tr->scratch;
5493 module_delta = READ_ONCE(tr->module_delta);
5494 WRITE_ONCE(tr->module_delta, NULL);
5495 kfree_rcu(module_delta, rcu);
5496
5497 /* Set the persistent ring buffer meta data to this address */
5498 tscratch->text_addr = (unsigned long)_text;
5499 }
5500
5501 /**
5502 * tracing_update_buffers - used by tracing facility to expand ring buffers
5503 * @tr: The tracing instance
5504 *
5505 * To save on memory when the tracing is never used on a system with it
5506 * configured in. The ring buffers are set to a minimum size. But once
5507 * a user starts to use the tracing facility, then they need to grow
5508 * to their default size.
5509 *
5510 * This function is to be called when a tracer is about to be used.
5511 */
tracing_update_buffers(struct trace_array * tr)5512 int tracing_update_buffers(struct trace_array *tr)
5513 {
5514 int ret = 0;
5515
5516 if (!tr)
5517 tr = &global_trace;
5518
5519 guard(mutex)(&trace_types_lock);
5520
5521 update_last_data(tr);
5522
5523 if (!tr->ring_buffer_expanded)
5524 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5525 RING_BUFFER_ALL_CPUS);
5526 return ret;
5527 }
5528
5529 /*
5530 * Used to clear out the tracer before deletion of an instance.
5531 * Must have trace_types_lock held.
5532 */
tracing_set_nop(struct trace_array * tr)5533 static void tracing_set_nop(struct trace_array *tr)
5534 {
5535 if (tr->current_trace == &nop_trace)
5536 return;
5537
5538 tr->current_trace->enabled--;
5539
5540 if (tr->current_trace->reset)
5541 tr->current_trace->reset(tr);
5542
5543 tr->current_trace = &nop_trace;
5544 tr->current_trace_flags = nop_trace.flags;
5545 }
5546
5547 static bool tracer_options_updated;
5548
tracing_set_tracer(struct trace_array * tr,const char * buf)5549 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5550 {
5551 struct tracer *trace = NULL;
5552 struct tracers *t;
5553 bool had_max_tr;
5554 int ret;
5555
5556 guard(mutex)(&trace_types_lock);
5557
5558 update_last_data(tr);
5559
5560 if (!tr->ring_buffer_expanded) {
5561 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5562 RING_BUFFER_ALL_CPUS);
5563 if (ret < 0)
5564 return ret;
5565 ret = 0;
5566 }
5567
5568 list_for_each_entry(t, &tr->tracers, list) {
5569 if (strcmp(t->tracer->name, buf) == 0) {
5570 trace = t->tracer;
5571 break;
5572 }
5573 }
5574 if (!trace)
5575 return -EINVAL;
5576
5577 if (trace == tr->current_trace)
5578 return 0;
5579
5580 #ifdef CONFIG_TRACER_SNAPSHOT
5581 if (tracer_uses_snapshot(trace)) {
5582 local_irq_disable();
5583 arch_spin_lock(&tr->max_lock);
5584 ret = tr->cond_snapshot ? -EBUSY : 0;
5585 arch_spin_unlock(&tr->max_lock);
5586 local_irq_enable();
5587 if (ret)
5588 return ret;
5589 }
5590 #endif
5591 /* Some tracers won't work on kernel command line */
5592 if (system_state < SYSTEM_RUNNING && trace->noboot) {
5593 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5594 trace->name);
5595 return -EINVAL;
5596 }
5597
5598 /* Some tracers are only allowed for the top level buffer */
5599 if (!trace_ok_for_array(trace, tr))
5600 return -EINVAL;
5601
5602 /* If trace pipe files are being read, we can't change the tracer */
5603 if (tr->trace_ref)
5604 return -EBUSY;
5605
5606 trace_branch_disable();
5607
5608 tr->current_trace->enabled--;
5609
5610 if (tr->current_trace->reset)
5611 tr->current_trace->reset(tr);
5612
5613 had_max_tr = tracer_uses_snapshot(tr->current_trace);
5614
5615 /* Current trace needs to be nop_trace before synchronize_rcu */
5616 tr->current_trace = &nop_trace;
5617 tr->current_trace_flags = nop_trace.flags;
5618
5619 if (had_max_tr && !tracer_uses_snapshot(trace)) {
5620 /*
5621 * We need to make sure that the update_max_tr sees that
5622 * current_trace changed to nop_trace to keep it from
5623 * swapping the buffers after we resize it.
5624 * The update_max_tr is called from interrupts disabled
5625 * so a synchronized_sched() is sufficient.
5626 */
5627 synchronize_rcu();
5628 free_snapshot(tr);
5629 tracing_disarm_snapshot(tr);
5630 }
5631
5632 if (!had_max_tr && tracer_uses_snapshot(trace)) {
5633 ret = tracing_arm_snapshot_locked(tr);
5634 if (ret)
5635 return ret;
5636 }
5637
5638 tr->current_trace_flags = t->flags ? : t->tracer->flags;
5639
5640 if (trace->init) {
5641 ret = tracer_init(trace, tr);
5642 if (ret) {
5643 if (tracer_uses_snapshot(trace))
5644 tracing_disarm_snapshot(tr);
5645 tr->current_trace_flags = nop_trace.flags;
5646 return ret;
5647 }
5648 }
5649
5650 tr->current_trace = trace;
5651 tr->current_trace->enabled++;
5652 trace_branch_enable(tr);
5653
5654 return 0;
5655 }
5656
5657 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5658 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5659 size_t cnt, loff_t *ppos)
5660 {
5661 struct trace_array *tr = filp->private_data;
5662 char buf[MAX_TRACER_SIZE+1];
5663 char *name;
5664 size_t ret;
5665 int err;
5666
5667 ret = cnt;
5668
5669 if (cnt > MAX_TRACER_SIZE)
5670 cnt = MAX_TRACER_SIZE;
5671
5672 if (copy_from_user(buf, ubuf, cnt))
5673 return -EFAULT;
5674
5675 buf[cnt] = 0;
5676
5677 name = strim(buf);
5678
5679 err = tracing_set_tracer(tr, name);
5680 if (err)
5681 return err;
5682
5683 *ppos += ret;
5684
5685 return ret;
5686 }
5687
5688 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5689 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5690 size_t cnt, loff_t *ppos)
5691 {
5692 char buf[64];
5693 int r;
5694
5695 r = snprintf(buf, sizeof(buf), "%ld\n",
5696 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5697 if (r > sizeof(buf))
5698 r = sizeof(buf);
5699 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5700 }
5701
5702 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5703 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5704 size_t cnt, loff_t *ppos)
5705 {
5706 unsigned long val;
5707 int ret;
5708
5709 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5710 if (ret)
5711 return ret;
5712
5713 *ptr = val * 1000;
5714
5715 return cnt;
5716 }
5717
5718 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5719 tracing_thresh_read(struct file *filp, char __user *ubuf,
5720 size_t cnt, loff_t *ppos)
5721 {
5722 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5723 }
5724
5725 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5726 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5727 size_t cnt, loff_t *ppos)
5728 {
5729 struct trace_array *tr = filp->private_data;
5730 int ret;
5731
5732 guard(mutex)(&trace_types_lock);
5733 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5734 if (ret < 0)
5735 return ret;
5736
5737 if (tr->current_trace->update_thresh) {
5738 ret = tr->current_trace->update_thresh(tr);
5739 if (ret < 0)
5740 return ret;
5741 }
5742
5743 return cnt;
5744 }
5745
5746 #ifdef CONFIG_TRACER_MAX_TRACE
5747
5748 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5749 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5750 size_t cnt, loff_t *ppos)
5751 {
5752 struct trace_array *tr = filp->private_data;
5753
5754 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
5755 }
5756
5757 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5758 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5759 size_t cnt, loff_t *ppos)
5760 {
5761 struct trace_array *tr = filp->private_data;
5762
5763 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
5764 }
5765
5766 #endif
5767
open_pipe_on_cpu(struct trace_array * tr,int cpu)5768 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5769 {
5770 if (cpu == RING_BUFFER_ALL_CPUS) {
5771 if (cpumask_empty(tr->pipe_cpumask)) {
5772 cpumask_setall(tr->pipe_cpumask);
5773 return 0;
5774 }
5775 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5776 cpumask_set_cpu(cpu, tr->pipe_cpumask);
5777 return 0;
5778 }
5779 return -EBUSY;
5780 }
5781
close_pipe_on_cpu(struct trace_array * tr,int cpu)5782 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5783 {
5784 if (cpu == RING_BUFFER_ALL_CPUS) {
5785 WARN_ON(!cpumask_full(tr->pipe_cpumask));
5786 cpumask_clear(tr->pipe_cpumask);
5787 } else {
5788 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5789 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5790 }
5791 }
5792
tracing_open_pipe(struct inode * inode,struct file * filp)5793 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5794 {
5795 struct trace_array *tr = inode->i_private;
5796 struct trace_iterator *iter;
5797 int cpu;
5798 int ret;
5799
5800 ret = tracing_check_open_get_tr(tr);
5801 if (ret)
5802 return ret;
5803
5804 guard(mutex)(&trace_types_lock);
5805 cpu = tracing_get_cpu(inode);
5806 ret = open_pipe_on_cpu(tr, cpu);
5807 if (ret)
5808 goto fail_pipe_on_cpu;
5809
5810 /* create a buffer to store the information to pass to userspace */
5811 iter = kzalloc_obj(*iter);
5812 if (!iter) {
5813 ret = -ENOMEM;
5814 goto fail_alloc_iter;
5815 }
5816
5817 trace_seq_init(&iter->seq);
5818 iter->trace = tr->current_trace;
5819
5820 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5821 ret = -ENOMEM;
5822 goto fail;
5823 }
5824
5825 /* trace pipe does not show start of buffer */
5826 cpumask_setall(iter->started);
5827
5828 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5829 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5830
5831 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5832 if (trace_clocks[tr->clock_id].in_ns)
5833 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5834
5835 iter->tr = tr;
5836 iter->array_buffer = &tr->array_buffer;
5837 iter->cpu_file = cpu;
5838 mutex_init(&iter->mutex);
5839 filp->private_data = iter;
5840
5841 if (iter->trace->pipe_open)
5842 iter->trace->pipe_open(iter);
5843
5844 nonseekable_open(inode, filp);
5845
5846 tr->trace_ref++;
5847
5848 return ret;
5849
5850 fail:
5851 kfree(iter);
5852 fail_alloc_iter:
5853 close_pipe_on_cpu(tr, cpu);
5854 fail_pipe_on_cpu:
5855 __trace_array_put(tr);
5856 return ret;
5857 }
5858
tracing_release_pipe(struct inode * inode,struct file * file)5859 static int tracing_release_pipe(struct inode *inode, struct file *file)
5860 {
5861 struct trace_iterator *iter = file->private_data;
5862 struct trace_array *tr = inode->i_private;
5863
5864 scoped_guard(mutex, &trace_types_lock) {
5865 tr->trace_ref--;
5866
5867 if (iter->trace->pipe_close)
5868 iter->trace->pipe_close(iter);
5869 close_pipe_on_cpu(tr, iter->cpu_file);
5870 }
5871
5872 free_trace_iter_content(iter);
5873 kfree(iter);
5874
5875 trace_array_put(tr);
5876
5877 return 0;
5878 }
5879
5880 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5881 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5882 {
5883 struct trace_array *tr = iter->tr;
5884
5885 /* Iterators are static, they should be filled or empty */
5886 if (trace_buffer_iter(iter, iter->cpu_file))
5887 return EPOLLIN | EPOLLRDNORM;
5888
5889 if (tr->trace_flags & TRACE_ITER(BLOCK))
5890 /*
5891 * Always select as readable when in blocking mode
5892 */
5893 return EPOLLIN | EPOLLRDNORM;
5894 else
5895 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5896 filp, poll_table, iter->tr->buffer_percent);
5897 }
5898
5899 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5900 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5901 {
5902 struct trace_iterator *iter = filp->private_data;
5903
5904 return trace_poll(iter, filp, poll_table);
5905 }
5906
5907 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5908 static int tracing_wait_pipe(struct file *filp)
5909 {
5910 struct trace_iterator *iter = filp->private_data;
5911 int ret;
5912
5913 while (trace_empty(iter)) {
5914
5915 if ((filp->f_flags & O_NONBLOCK)) {
5916 return -EAGAIN;
5917 }
5918
5919 /*
5920 * We block until we read something and tracing is disabled.
5921 * We still block if tracing is disabled, but we have never
5922 * read anything. This allows a user to cat this file, and
5923 * then enable tracing. But after we have read something,
5924 * we give an EOF when tracing is again disabled.
5925 *
5926 * iter->pos will be 0 if we haven't read anything.
5927 */
5928 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5929 break;
5930
5931 mutex_unlock(&iter->mutex);
5932
5933 ret = wait_on_pipe(iter, 0);
5934
5935 mutex_lock(&iter->mutex);
5936
5937 if (ret)
5938 return ret;
5939 }
5940
5941 return 1;
5942 }
5943
update_last_data_if_empty(struct trace_array * tr)5944 static bool update_last_data_if_empty(struct trace_array *tr)
5945 {
5946 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5947 return false;
5948
5949 if (!ring_buffer_empty(tr->array_buffer.buffer))
5950 return false;
5951
5952 /*
5953 * If the buffer contains the last boot data and all per-cpu
5954 * buffers are empty, reset it from the kernel side.
5955 */
5956 update_last_data(tr);
5957 return true;
5958 }
5959
5960 /*
5961 * Consumer reader.
5962 */
5963 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5964 tracing_read_pipe(struct file *filp, char __user *ubuf,
5965 size_t cnt, loff_t *ppos)
5966 {
5967 struct trace_iterator *iter = filp->private_data;
5968 ssize_t sret;
5969
5970 /*
5971 * Avoid more than one consumer on a single file descriptor
5972 * This is just a matter of traces coherency, the ring buffer itself
5973 * is protected.
5974 */
5975 guard(mutex)(&iter->mutex);
5976
5977 /* return any leftover data */
5978 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5979 if (sret != -EBUSY)
5980 return sret;
5981
5982 trace_seq_init(&iter->seq);
5983
5984 if (iter->trace->read) {
5985 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5986 if (sret)
5987 return sret;
5988 }
5989
5990 waitagain:
5991 if (update_last_data_if_empty(iter->tr))
5992 return 0;
5993
5994 sret = tracing_wait_pipe(filp);
5995 if (sret <= 0)
5996 return sret;
5997
5998 /* stop when tracing is finished */
5999 if (trace_empty(iter))
6000 return 0;
6001
6002 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6003 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6004
6005 /* reset all but tr, trace, and overruns */
6006 trace_iterator_reset(iter);
6007 cpumask_clear(iter->started);
6008 trace_seq_init(&iter->seq);
6009
6010 trace_event_read_lock();
6011 trace_access_lock(iter->cpu_file);
6012 while (trace_find_next_entry_inc(iter) != NULL) {
6013 enum print_line_t ret;
6014 int save_len = iter->seq.seq.len;
6015
6016 ret = print_trace_line(iter);
6017 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6018 /*
6019 * If one print_trace_line() fills entire trace_seq in one shot,
6020 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6021 * In this case, we need to consume it, otherwise, loop will peek
6022 * this event next time, resulting in an infinite loop.
6023 */
6024 if (save_len == 0) {
6025 iter->seq.full = 0;
6026 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6027 trace_consume(iter);
6028 break;
6029 }
6030
6031 /* In other cases, don't print partial lines */
6032 iter->seq.seq.len = save_len;
6033 break;
6034 }
6035 if (ret != TRACE_TYPE_NO_CONSUME)
6036 trace_consume(iter);
6037
6038 if (trace_seq_used(&iter->seq) >= cnt)
6039 break;
6040
6041 /*
6042 * Setting the full flag means we reached the trace_seq buffer
6043 * size and we should leave by partial output condition above.
6044 * One of the trace_seq_* functions is not used properly.
6045 */
6046 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6047 iter->ent->type);
6048 }
6049 trace_access_unlock(iter->cpu_file);
6050 trace_event_read_unlock();
6051
6052 /* Now copy what we have to the user */
6053 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6054 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6055 trace_seq_init(&iter->seq);
6056
6057 /*
6058 * If there was nothing to send to user, in spite of consuming trace
6059 * entries, go back to wait for more entries.
6060 */
6061 if (sret == -EBUSY)
6062 goto waitagain;
6063
6064 return sret;
6065 }
6066
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6067 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6068 unsigned int idx)
6069 {
6070 __free_page(spd->pages[idx]);
6071 }
6072
6073 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6074 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6075 {
6076 size_t count;
6077 int save_len;
6078 int ret;
6079
6080 /* Seq buffer is page-sized, exactly what we need. */
6081 for (;;) {
6082 save_len = iter->seq.seq.len;
6083 ret = print_trace_line(iter);
6084
6085 if (trace_seq_has_overflowed(&iter->seq)) {
6086 iter->seq.seq.len = save_len;
6087 break;
6088 }
6089
6090 /*
6091 * This should not be hit, because it should only
6092 * be set if the iter->seq overflowed. But check it
6093 * anyway to be safe.
6094 */
6095 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6096 iter->seq.seq.len = save_len;
6097 break;
6098 }
6099
6100 count = trace_seq_used(&iter->seq) - save_len;
6101 if (rem < count) {
6102 rem = 0;
6103 iter->seq.seq.len = save_len;
6104 break;
6105 }
6106
6107 if (ret != TRACE_TYPE_NO_CONSUME)
6108 trace_consume(iter);
6109 rem -= count;
6110 if (!trace_find_next_entry_inc(iter)) {
6111 rem = 0;
6112 iter->ent = NULL;
6113 break;
6114 }
6115 }
6116
6117 return rem;
6118 }
6119
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6120 static ssize_t tracing_splice_read_pipe(struct file *filp,
6121 loff_t *ppos,
6122 struct pipe_inode_info *pipe,
6123 size_t len,
6124 unsigned int flags)
6125 {
6126 struct page *pages_def[PIPE_DEF_BUFFERS];
6127 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6128 struct trace_iterator *iter = filp->private_data;
6129 struct splice_pipe_desc spd = {
6130 .pages = pages_def,
6131 .partial = partial_def,
6132 .nr_pages = 0, /* This gets updated below. */
6133 .nr_pages_max = PIPE_DEF_BUFFERS,
6134 .ops = &default_pipe_buf_ops,
6135 .spd_release = tracing_spd_release_pipe,
6136 };
6137 ssize_t ret;
6138 size_t rem;
6139 unsigned int i;
6140
6141 if (splice_grow_spd(pipe, &spd))
6142 return -ENOMEM;
6143
6144 mutex_lock(&iter->mutex);
6145
6146 if (iter->trace->splice_read) {
6147 ret = iter->trace->splice_read(iter, filp,
6148 ppos, pipe, len, flags);
6149 if (ret)
6150 goto out_err;
6151 }
6152
6153 ret = tracing_wait_pipe(filp);
6154 if (ret <= 0)
6155 goto out_err;
6156
6157 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6158 ret = -EFAULT;
6159 goto out_err;
6160 }
6161
6162 trace_event_read_lock();
6163 trace_access_lock(iter->cpu_file);
6164
6165 /* Fill as many pages as possible. */
6166 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6167 spd.pages[i] = alloc_page(GFP_KERNEL);
6168 if (!spd.pages[i])
6169 break;
6170
6171 rem = tracing_fill_pipe_page(rem, iter);
6172
6173 /* Copy the data into the page, so we can start over. */
6174 ret = trace_seq_to_buffer(&iter->seq,
6175 page_address(spd.pages[i]),
6176 min((size_t)trace_seq_used(&iter->seq),
6177 (size_t)PAGE_SIZE));
6178 if (ret < 0) {
6179 __free_page(spd.pages[i]);
6180 break;
6181 }
6182 spd.partial[i].offset = 0;
6183 spd.partial[i].len = ret;
6184
6185 trace_seq_init(&iter->seq);
6186 }
6187
6188 trace_access_unlock(iter->cpu_file);
6189 trace_event_read_unlock();
6190 mutex_unlock(&iter->mutex);
6191
6192 spd.nr_pages = i;
6193
6194 if (i)
6195 ret = splice_to_pipe(pipe, &spd);
6196 else
6197 ret = 0;
6198 out:
6199 splice_shrink_spd(&spd);
6200 return ret;
6201
6202 out_err:
6203 mutex_unlock(&iter->mutex);
6204 goto out;
6205 }
6206
6207 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6208 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6209 size_t cnt, loff_t *ppos)
6210 {
6211 struct inode *inode = file_inode(filp);
6212 struct trace_array *tr = inode->i_private;
6213 char buf[64];
6214 int r;
6215
6216 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6217
6218 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6219 }
6220
6221 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6222 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6223 size_t cnt, loff_t *ppos)
6224 {
6225 struct inode *inode = file_inode(filp);
6226 struct trace_array *tr = inode->i_private;
6227 unsigned long val;
6228 int ret;
6229
6230 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6231 if (ret)
6232 return ret;
6233
6234 if (val > SYSCALL_FAULT_USER_MAX)
6235 val = SYSCALL_FAULT_USER_MAX;
6236
6237 tr->syscall_buf_sz = val;
6238
6239 *ppos += cnt;
6240
6241 return cnt;
6242 }
6243
6244 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6245 tracing_entries_read(struct file *filp, char __user *ubuf,
6246 size_t cnt, loff_t *ppos)
6247 {
6248 struct inode *inode = file_inode(filp);
6249 struct trace_array *tr = inode->i_private;
6250 int cpu = tracing_get_cpu(inode);
6251 char buf[64];
6252 int r = 0;
6253 ssize_t ret;
6254
6255 mutex_lock(&trace_types_lock);
6256
6257 if (cpu == RING_BUFFER_ALL_CPUS) {
6258 int cpu, buf_size_same;
6259 unsigned long size;
6260
6261 size = 0;
6262 buf_size_same = 1;
6263 /* check if all cpu sizes are same */
6264 for_each_tracing_cpu(cpu) {
6265 /* fill in the size from first enabled cpu */
6266 if (size == 0)
6267 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6268 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6269 buf_size_same = 0;
6270 break;
6271 }
6272 }
6273
6274 if (buf_size_same) {
6275 if (!tr->ring_buffer_expanded)
6276 r = sprintf(buf, "%lu (expanded: %lu)\n",
6277 size >> 10,
6278 trace_buf_size >> 10);
6279 else
6280 r = sprintf(buf, "%lu\n", size >> 10);
6281 } else
6282 r = sprintf(buf, "X\n");
6283 } else
6284 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6285
6286 mutex_unlock(&trace_types_lock);
6287
6288 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6289 return ret;
6290 }
6291
6292 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6293 tracing_entries_write(struct file *filp, const char __user *ubuf,
6294 size_t cnt, loff_t *ppos)
6295 {
6296 struct inode *inode = file_inode(filp);
6297 struct trace_array *tr = inode->i_private;
6298 unsigned long val;
6299 int ret;
6300
6301 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6302 if (ret)
6303 return ret;
6304
6305 /* must have at least 1 entry */
6306 if (!val)
6307 return -EINVAL;
6308
6309 /* value is in KB */
6310 val <<= 10;
6311 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6312 if (ret < 0)
6313 return ret;
6314
6315 *ppos += cnt;
6316
6317 return cnt;
6318 }
6319
6320 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6321 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6322 size_t cnt, loff_t *ppos)
6323 {
6324 struct trace_array *tr = filp->private_data;
6325 char buf[64];
6326 int r, cpu;
6327 unsigned long size = 0, expanded_size = 0;
6328
6329 mutex_lock(&trace_types_lock);
6330 for_each_tracing_cpu(cpu) {
6331 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6332 if (!tr->ring_buffer_expanded)
6333 expanded_size += trace_buf_size >> 10;
6334 }
6335 if (tr->ring_buffer_expanded)
6336 r = sprintf(buf, "%lu\n", size);
6337 else
6338 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6339 mutex_unlock(&trace_types_lock);
6340
6341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6342 }
6343
6344 #define LAST_BOOT_HEADER ((void *)1)
6345
l_next(struct seq_file * m,void * v,loff_t * pos)6346 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
6347 {
6348 struct trace_array *tr = m->private;
6349 struct trace_scratch *tscratch = tr->scratch;
6350 unsigned int index = *pos;
6351
6352 (*pos)++;
6353
6354 if (*pos == 1)
6355 return LAST_BOOT_HEADER;
6356
6357 /* Only show offsets of the last boot data */
6358 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6359 return NULL;
6360
6361 /* *pos 0 is for the header, 1 is for the first module */
6362 index--;
6363
6364 if (index >= tscratch->nr_entries)
6365 return NULL;
6366
6367 return &tscratch->entries[index];
6368 }
6369
l_start(struct seq_file * m,loff_t * pos)6370 static void *l_start(struct seq_file *m, loff_t *pos)
6371 {
6372 mutex_lock(&scratch_mutex);
6373
6374 return l_next(m, NULL, pos);
6375 }
6376
l_stop(struct seq_file * m,void * p)6377 static void l_stop(struct seq_file *m, void *p)
6378 {
6379 mutex_unlock(&scratch_mutex);
6380 }
6381
show_last_boot_header(struct seq_file * m,struct trace_array * tr)6382 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
6383 {
6384 struct trace_scratch *tscratch = tr->scratch;
6385
6386 /*
6387 * Do not leak KASLR address. This only shows the KASLR address of
6388 * the last boot. When the ring buffer is started, the LAST_BOOT
6389 * flag gets cleared, and this should only report "current".
6390 * Otherwise it shows the KASLR address from the previous boot which
6391 * should not be the same as the current boot.
6392 */
6393 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6394 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
6395 else
6396 seq_puts(m, "# Current\n");
6397 }
6398
l_show(struct seq_file * m,void * v)6399 static int l_show(struct seq_file *m, void *v)
6400 {
6401 struct trace_array *tr = m->private;
6402 struct trace_mod_entry *entry = v;
6403
6404 if (v == LAST_BOOT_HEADER) {
6405 show_last_boot_header(m, tr);
6406 return 0;
6407 }
6408
6409 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
6410 return 0;
6411 }
6412
6413 static const struct seq_operations last_boot_seq_ops = {
6414 .start = l_start,
6415 .next = l_next,
6416 .stop = l_stop,
6417 .show = l_show,
6418 };
6419
tracing_last_boot_open(struct inode * inode,struct file * file)6420 static int tracing_last_boot_open(struct inode *inode, struct file *file)
6421 {
6422 struct trace_array *tr = inode->i_private;
6423 struct seq_file *m;
6424 int ret;
6425
6426 ret = tracing_check_open_get_tr(tr);
6427 if (ret)
6428 return ret;
6429
6430 ret = seq_open(file, &last_boot_seq_ops);
6431 if (ret) {
6432 trace_array_put(tr);
6433 return ret;
6434 }
6435
6436 m = file->private_data;
6437 m->private = tr;
6438
6439 return 0;
6440 }
6441
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6442 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6443 {
6444 struct trace_array *tr = inode->i_private;
6445 int cpu = tracing_get_cpu(inode);
6446 int ret;
6447
6448 ret = tracing_check_open_get_tr(tr);
6449 if (ret)
6450 return ret;
6451
6452 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6453 if (ret < 0)
6454 __trace_array_put(tr);
6455 return ret;
6456 }
6457
6458 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6459 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6460 size_t cnt, loff_t *ppos)
6461 {
6462 /*
6463 * There is no need to read what the user has written, this function
6464 * is just to make sure that there is no error when "echo" is used
6465 */
6466
6467 *ppos += cnt;
6468
6469 return cnt;
6470 }
6471
6472 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6473 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6474 {
6475 struct trace_array *tr = inode->i_private;
6476
6477 /* disable tracing ? */
6478 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
6479 tracer_tracing_off(tr);
6480 /* resize the ring buffer to 0 */
6481 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6482
6483 trace_array_put(tr);
6484
6485 return 0;
6486 }
6487
6488 #define TRACE_MARKER_MAX_SIZE 4096
6489
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)6490 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
6491 size_t cnt, unsigned long ip)
6492 {
6493 struct ring_buffer_event *event;
6494 enum event_trigger_type tt = ETT_NONE;
6495 struct trace_buffer *buffer;
6496 struct print_entry *entry;
6497 int meta_size;
6498 ssize_t written;
6499 size_t size;
6500
6501 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
6502 again:
6503 size = cnt + meta_size;
6504
6505 buffer = tr->array_buffer.buffer;
6506 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6507 tracing_gen_ctx());
6508 if (unlikely(!event)) {
6509 /*
6510 * If the size was greater than what was allowed, then
6511 * make it smaller and try again.
6512 */
6513 if (size > ring_buffer_max_event_size(buffer)) {
6514 cnt = ring_buffer_max_event_size(buffer) - meta_size;
6515 /* The above should only happen once */
6516 if (WARN_ON_ONCE(cnt + meta_size == size))
6517 return -EBADF;
6518 goto again;
6519 }
6520
6521 /* Ring buffer disabled, return as if not open for write */
6522 return -EBADF;
6523 }
6524
6525 entry = ring_buffer_event_data(event);
6526 entry->ip = ip;
6527 memcpy(&entry->buf, buf, cnt);
6528 written = cnt;
6529
6530 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6531 /* do not add \n before testing triggers, but add \0 */
6532 entry->buf[cnt] = '\0';
6533 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6534 }
6535
6536 if (entry->buf[cnt - 1] != '\n') {
6537 entry->buf[cnt] = '\n';
6538 entry->buf[cnt + 1] = '\0';
6539 } else
6540 entry->buf[cnt] = '\0';
6541
6542 if (static_branch_unlikely(&trace_marker_exports_enabled))
6543 ftrace_exports(event, TRACE_EXPORT_MARKER);
6544 __buffer_unlock_commit(buffer, event);
6545
6546 if (tt)
6547 event_triggers_post_call(tr->trace_marker_file, tt);
6548
6549 return written;
6550 }
6551
6552 struct trace_user_buf {
6553 char *buf;
6554 };
6555
6556 static DEFINE_MUTEX(trace_user_buffer_mutex);
6557 static struct trace_user_buf_info *trace_user_buffer;
6558
6559 /**
6560 * trace_user_fault_destroy - free up allocated memory of a trace user buffer
6561 * @tinfo: The descriptor to free up
6562 *
6563 * Frees any data allocated in the trace info dsecriptor.
6564 */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)6565 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
6566 {
6567 char *buf;
6568 int cpu;
6569
6570 if (!tinfo || !tinfo->tbuf)
6571 return;
6572
6573 for_each_possible_cpu(cpu) {
6574 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6575 kfree(buf);
6576 }
6577 free_percpu(tinfo->tbuf);
6578 }
6579
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6580 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6581 {
6582 char *buf;
6583 int cpu;
6584
6585 lockdep_assert_held(&trace_user_buffer_mutex);
6586
6587 tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6588 if (!tinfo->tbuf)
6589 return -ENOMEM;
6590
6591 tinfo->ref = 1;
6592 tinfo->size = size;
6593
6594 /* Clear each buffer in case of error */
6595 for_each_possible_cpu(cpu) {
6596 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6597 }
6598
6599 for_each_possible_cpu(cpu) {
6600 buf = kmalloc_node(size, GFP_KERNEL,
6601 cpu_to_node(cpu));
6602 if (!buf)
6603 return -ENOMEM;
6604 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6605 }
6606
6607 return 0;
6608 }
6609
6610 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6611 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6612 {
6613 lockdep_assert_held(&trace_user_buffer_mutex);
6614
6615 trace_user_fault_destroy(*tinfo);
6616 kfree(*tinfo);
6617 *tinfo = NULL;
6618 }
6619
6620 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6621 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6622 {
6623 bool alloc = false;
6624 int ret;
6625
6626 lockdep_assert_held(&trace_user_buffer_mutex);
6627
6628 if (!*tinfo) {
6629 alloc = true;
6630 *tinfo = kzalloc_obj(**tinfo);
6631 if (!*tinfo)
6632 return -ENOMEM;
6633 }
6634
6635 ret = user_fault_buffer_enable(*tinfo, size);
6636 if (ret < 0 && alloc)
6637 user_buffer_free(tinfo);
6638
6639 return ret;
6640 }
6641
6642 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6643 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6644 {
6645 guard(mutex)(&trace_user_buffer_mutex);
6646
6647 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6648 return;
6649
6650 if (--(*tinfo)->ref)
6651 return;
6652
6653 user_buffer_free(tinfo);
6654 }
6655
6656 /**
6657 * trace_user_fault_init - Allocated or reference a per CPU buffer
6658 * @tinfo: A pointer to the trace buffer descriptor
6659 * @size: The size to allocate each per CPU buffer
6660 *
6661 * Create a per CPU buffer that can be used to copy from user space
6662 * in a task context. When calling trace_user_fault_read(), preemption
6663 * must be disabled, and it will enable preemption and copy user
6664 * space data to the buffer. If any schedule switches occur, it will
6665 * retry until it succeeds without a schedule switch knowing the buffer
6666 * is still valid.
6667 *
6668 * Returns 0 on success, negative on failure.
6669 */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6670 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6671 {
6672 int ret;
6673
6674 if (!tinfo)
6675 return -EINVAL;
6676
6677 guard(mutex)(&trace_user_buffer_mutex);
6678
6679 ret = user_buffer_init(&tinfo, size);
6680 if (ret < 0)
6681 trace_user_fault_destroy(tinfo);
6682
6683 return ret;
6684 }
6685
6686 /**
6687 * trace_user_fault_get - up the ref count for the user buffer
6688 * @tinfo: A pointer to a pointer to the trace buffer descriptor
6689 *
6690 * Ups the ref count of the trace buffer.
6691 *
6692 * Returns the new ref count.
6693 */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6694 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6695 {
6696 if (!tinfo)
6697 return -1;
6698
6699 guard(mutex)(&trace_user_buffer_mutex);
6700
6701 tinfo->ref++;
6702 return tinfo->ref;
6703 }
6704
6705 /**
6706 * trace_user_fault_put - dereference a per cpu trace buffer
6707 * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6708 *
6709 * Decrement the ref count of @tinfo.
6710 *
6711 * Returns the new refcount (negative on error).
6712 */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6713 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6714 {
6715 guard(mutex)(&trace_user_buffer_mutex);
6716
6717 if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6718 return -1;
6719
6720 --tinfo->ref;
6721 return tinfo->ref;
6722 }
6723
6724 /**
6725 * trace_user_fault_read - Read user space into a per CPU buffer
6726 * @tinfo: The @tinfo allocated by trace_user_fault_get()
6727 * @ptr: The user space pointer to read
6728 * @size: The size of user space to read.
6729 * @copy_func: Optional function to use to copy from user space
6730 * @data: Data to pass to copy_func if it was supplied
6731 *
6732 * Preemption must be disabled when this is called, and must not
6733 * be enabled while using the returned buffer.
6734 * This does the copying from user space into a per CPU buffer.
6735 *
6736 * The @size must not be greater than the size passed in to
6737 * trace_user_fault_init().
6738 *
6739 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6740 * otherwise it will call @copy_func. It will call @copy_func with:
6741 *
6742 * buffer: the per CPU buffer of the @tinfo.
6743 * ptr: The pointer @ptr to user space to read
6744 * size: The @size of the ptr to read
6745 * data: The @data parameter
6746 *
6747 * It is expected that @copy_func will return 0 on success and non zero
6748 * if there was a fault.
6749 *
6750 * Returns a pointer to the buffer with the content read from @ptr.
6751 * Preemption must remain disabled while the caller accesses the
6752 * buffer returned by this function.
6753 * Returns NULL if there was a fault, or the size passed in is
6754 * greater than the size passed to trace_user_fault_init().
6755 */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6756 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6757 const char __user *ptr, size_t size,
6758 trace_user_buf_copy copy_func, void *data)
6759 {
6760 int cpu = smp_processor_id();
6761 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6762 unsigned int cnt;
6763 int trys = 0;
6764 int ret;
6765
6766 lockdep_assert_preemption_disabled();
6767
6768 /*
6769 * It's up to the caller to not try to copy more than it said
6770 * it would.
6771 */
6772 if (size > tinfo->size)
6773 return NULL;
6774
6775 /*
6776 * This acts similar to a seqcount. The per CPU context switches are
6777 * recorded, migration is disabled and preemption is enabled. The
6778 * read of the user space memory is copied into the per CPU buffer.
6779 * Preemption is disabled again, and if the per CPU context switches count
6780 * is still the same, it means the buffer has not been corrupted.
6781 * If the count is different, it is assumed the buffer is corrupted
6782 * and reading must be tried again.
6783 */
6784
6785 do {
6786 /*
6787 * It is possible that something is trying to migrate this
6788 * task. What happens then, is when preemption is enabled,
6789 * the migration thread will preempt this task, try to
6790 * migrate it, fail, then let it run again. That will
6791 * cause this to loop again and never succeed.
6792 * On failures, enabled and disable preemption with
6793 * migration enabled, to allow the migration thread to
6794 * migrate this task.
6795 */
6796 if (trys) {
6797 preempt_enable_notrace();
6798 preempt_disable_notrace();
6799 cpu = smp_processor_id();
6800 buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6801 }
6802
6803 /*
6804 * If for some reason, copy_from_user() always causes a context
6805 * switch, this would then cause an infinite loop.
6806 * If this task is preempted by another user space task, it
6807 * will cause this task to try again. But just in case something
6808 * changes where the copying from user space causes another task
6809 * to run, prevent this from going into an infinite loop.
6810 * 100 tries should be plenty.
6811 */
6812 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6813 return NULL;
6814
6815 /* Read the current CPU context switch counter */
6816 cnt = nr_context_switches_cpu(cpu);
6817
6818 /*
6819 * Preemption is going to be enabled, but this task must
6820 * remain on this CPU.
6821 */
6822 migrate_disable();
6823
6824 /*
6825 * Now preemption is being enabled and another task can come in
6826 * and use the same buffer and corrupt our data.
6827 */
6828 preempt_enable_notrace();
6829
6830 /* Make sure preemption is enabled here */
6831 lockdep_assert_preemption_enabled();
6832
6833 if (copy_func) {
6834 ret = copy_func(buffer, ptr, size, data);
6835 } else {
6836 ret = __copy_from_user(buffer, ptr, size);
6837 }
6838
6839 preempt_disable_notrace();
6840 migrate_enable();
6841
6842 /* if it faulted, no need to test if the buffer was corrupted */
6843 if (ret)
6844 return NULL;
6845
6846 /*
6847 * Preemption is disabled again, now check the per CPU context
6848 * switch counter. If it doesn't match, then another user space
6849 * process may have schedule in and corrupted our buffer. In that
6850 * case the copying must be retried.
6851 */
6852 } while (nr_context_switches_cpu(cpu) != cnt);
6853
6854 return buffer;
6855 }
6856
6857 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6858 tracing_mark_write(struct file *filp, const char __user *ubuf,
6859 size_t cnt, loff_t *fpos)
6860 {
6861 struct trace_array *tr = filp->private_data;
6862 ssize_t written = -ENODEV;
6863 unsigned long ip;
6864 char *buf;
6865
6866 if (unlikely(tracing_disabled))
6867 return -EINVAL;
6868
6869 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6870 return -EINVAL;
6871
6872 if ((ssize_t)cnt < 0)
6873 return -EINVAL;
6874
6875 if (cnt > TRACE_MARKER_MAX_SIZE)
6876 cnt = TRACE_MARKER_MAX_SIZE;
6877
6878 /* Must have preemption disabled while having access to the buffer */
6879 guard(preempt_notrace)();
6880
6881 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6882 if (!buf)
6883 return -EFAULT;
6884
6885 /* The selftests expect this function to be the IP address */
6886 ip = _THIS_IP_;
6887
6888 /* The global trace_marker can go to multiple instances */
6889 if (tr == &global_trace) {
6890 guard(rcu)();
6891 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6892 written = write_marker_to_buffer(tr, buf, cnt, ip);
6893 if (written < 0)
6894 break;
6895 }
6896 } else {
6897 written = write_marker_to_buffer(tr, buf, cnt, ip);
6898 }
6899
6900 return written;
6901 }
6902
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6903 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6904 const char *buf, size_t cnt)
6905 {
6906 struct ring_buffer_event *event;
6907 struct trace_buffer *buffer;
6908 struct raw_data_entry *entry;
6909 ssize_t written;
6910 size_t size;
6911
6912 /* cnt includes both the entry->id and the data behind it. */
6913 size = struct_offset(entry, id) + cnt;
6914
6915 buffer = tr->array_buffer.buffer;
6916
6917 if (size > ring_buffer_max_event_size(buffer))
6918 return -EINVAL;
6919
6920 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6921 tracing_gen_ctx());
6922 if (!event)
6923 /* Ring buffer disabled, return as if not open for write */
6924 return -EBADF;
6925
6926 entry = ring_buffer_event_data(event);
6927 unsafe_memcpy(&entry->id, buf, cnt,
6928 "id and content already reserved on ring buffer"
6929 "'buf' includes the 'id' and the data."
6930 "'entry' was allocated with cnt from 'id'.");
6931 written = cnt;
6932
6933 __buffer_unlock_commit(buffer, event);
6934
6935 return written;
6936 }
6937
6938 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6939 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6940 size_t cnt, loff_t *fpos)
6941 {
6942 struct trace_array *tr = filp->private_data;
6943 ssize_t written = -ENODEV;
6944 char *buf;
6945
6946 if (unlikely(tracing_disabled))
6947 return -EINVAL;
6948
6949 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6950 return -EINVAL;
6951
6952 /* The marker must at least have a tag id */
6953 if (cnt < sizeof(unsigned int))
6954 return -EINVAL;
6955
6956 /* raw write is all or nothing */
6957 if (cnt > TRACE_MARKER_MAX_SIZE)
6958 return -EINVAL;
6959
6960 /* Must have preemption disabled while having access to the buffer */
6961 guard(preempt_notrace)();
6962
6963 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6964 if (!buf)
6965 return -EFAULT;
6966
6967 /* The global trace_marker_raw can go to multiple instances */
6968 if (tr == &global_trace) {
6969 guard(rcu)();
6970 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6971 written = write_raw_marker_to_buffer(tr, buf, cnt);
6972 if (written < 0)
6973 break;
6974 }
6975 } else {
6976 written = write_raw_marker_to_buffer(tr, buf, cnt);
6977 }
6978
6979 return written;
6980 }
6981
tracing_mark_open(struct inode * inode,struct file * filp)6982 static int tracing_mark_open(struct inode *inode, struct file *filp)
6983 {
6984 int ret;
6985
6986 scoped_guard(mutex, &trace_user_buffer_mutex) {
6987 if (!trace_user_buffer) {
6988 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6989 if (ret < 0)
6990 return ret;
6991 } else {
6992 trace_user_buffer->ref++;
6993 }
6994 }
6995
6996 stream_open(inode, filp);
6997 ret = tracing_open_generic_tr(inode, filp);
6998 if (ret < 0)
6999 user_buffer_put(&trace_user_buffer);
7000 return ret;
7001 }
7002
tracing_mark_release(struct inode * inode,struct file * file)7003 static int tracing_mark_release(struct inode *inode, struct file *file)
7004 {
7005 user_buffer_put(&trace_user_buffer);
7006 return tracing_release_generic_tr(inode, file);
7007 }
7008
tracing_clock_show(struct seq_file * m,void * v)7009 static int tracing_clock_show(struct seq_file *m, void *v)
7010 {
7011 struct trace_array *tr = m->private;
7012 int i;
7013
7014 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7015 seq_printf(m,
7016 "%s%s%s%s", i ? " " : "",
7017 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7018 i == tr->clock_id ? "]" : "");
7019 seq_putc(m, '\n');
7020
7021 return 0;
7022 }
7023
tracing_set_clock(struct trace_array * tr,const char * clockstr)7024 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7025 {
7026 int i;
7027
7028 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7029 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7030 break;
7031 }
7032 if (i == ARRAY_SIZE(trace_clocks))
7033 return -EINVAL;
7034
7035 guard(mutex)(&trace_types_lock);
7036
7037 tr->clock_id = i;
7038
7039 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7040
7041 /*
7042 * New clock may not be consistent with the previous clock.
7043 * Reset the buffer so that it doesn't have incomparable timestamps.
7044 */
7045 tracing_reset_online_cpus(&tr->array_buffer);
7046
7047 #ifdef CONFIG_TRACER_SNAPSHOT
7048 if (tr->snapshot_buffer.buffer)
7049 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
7050 tracing_reset_online_cpus(&tr->snapshot_buffer);
7051 #endif
7052 update_last_data_if_empty(tr);
7053
7054 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7055 struct trace_scratch *tscratch = tr->scratch;
7056
7057 tscratch->clock_id = i;
7058 }
7059
7060 return 0;
7061 }
7062
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7063 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7064 size_t cnt, loff_t *fpos)
7065 {
7066 struct seq_file *m = filp->private_data;
7067 struct trace_array *tr = m->private;
7068 char buf[64];
7069 const char *clockstr;
7070 int ret;
7071
7072 if (cnt >= sizeof(buf))
7073 return -EINVAL;
7074
7075 if (copy_from_user(buf, ubuf, cnt))
7076 return -EFAULT;
7077
7078 buf[cnt] = 0;
7079
7080 clockstr = strstrip(buf);
7081
7082 ret = tracing_set_clock(tr, clockstr);
7083 if (ret)
7084 return ret;
7085
7086 *fpos += cnt;
7087
7088 return cnt;
7089 }
7090
tracing_clock_open(struct inode * inode,struct file * file)7091 static int tracing_clock_open(struct inode *inode, struct file *file)
7092 {
7093 struct trace_array *tr = inode->i_private;
7094 int ret;
7095
7096 ret = tracing_check_open_get_tr(tr);
7097 if (ret)
7098 return ret;
7099
7100 ret = single_open(file, tracing_clock_show, inode->i_private);
7101 if (ret < 0)
7102 trace_array_put(tr);
7103
7104 return ret;
7105 }
7106
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7107 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7108 {
7109 struct trace_array *tr = m->private;
7110
7111 guard(mutex)(&trace_types_lock);
7112
7113 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7114 seq_puts(m, "delta [absolute]\n");
7115 else
7116 seq_puts(m, "[delta] absolute\n");
7117
7118 return 0;
7119 }
7120
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7121 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7122 {
7123 struct trace_array *tr = inode->i_private;
7124 int ret;
7125
7126 ret = tracing_check_open_get_tr(tr);
7127 if (ret)
7128 return ret;
7129
7130 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7131 if (ret < 0)
7132 trace_array_put(tr);
7133
7134 return ret;
7135 }
7136
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7137 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7138 {
7139 if (rbe == this_cpu_read(trace_buffered_event))
7140 return ring_buffer_time_stamp(buffer);
7141
7142 return ring_buffer_event_time_stamp(buffer, rbe);
7143 }
7144
7145 struct ftrace_buffer_info {
7146 struct trace_iterator iter;
7147 void *spare;
7148 unsigned int spare_cpu;
7149 unsigned int spare_size;
7150 unsigned int read;
7151 };
7152
7153 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7154 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7155 {
7156 struct trace_array *tr = inode->i_private;
7157 struct trace_iterator *iter;
7158 struct seq_file *m;
7159 int ret;
7160
7161 ret = tracing_check_open_get_tr(tr);
7162 if (ret)
7163 return ret;
7164
7165 if (file->f_mode & FMODE_READ) {
7166 iter = __tracing_open(inode, file, true);
7167 if (IS_ERR(iter))
7168 ret = PTR_ERR(iter);
7169 } else {
7170 /* Writes still need the seq_file to hold the private data */
7171 ret = -ENOMEM;
7172 m = kzalloc_obj(*m);
7173 if (!m)
7174 goto out;
7175 iter = kzalloc_obj(*iter);
7176 if (!iter) {
7177 kfree(m);
7178 goto out;
7179 }
7180 ret = 0;
7181
7182 iter->tr = tr;
7183 iter->array_buffer = &tr->snapshot_buffer;
7184 iter->cpu_file = tracing_get_cpu(inode);
7185 m->private = iter;
7186 file->private_data = m;
7187 }
7188 out:
7189 if (ret < 0)
7190 trace_array_put(tr);
7191
7192 return ret;
7193 }
7194
tracing_swap_cpu_buffer(void * tr)7195 static void tracing_swap_cpu_buffer(void *tr)
7196 {
7197 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7198 }
7199
7200 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7201 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7202 loff_t *ppos)
7203 {
7204 struct seq_file *m = filp->private_data;
7205 struct trace_iterator *iter = m->private;
7206 struct trace_array *tr = iter->tr;
7207 unsigned long val;
7208 int ret;
7209
7210 ret = tracing_update_buffers(tr);
7211 if (ret < 0)
7212 return ret;
7213
7214 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7215 if (ret)
7216 return ret;
7217
7218 guard(mutex)(&trace_types_lock);
7219
7220 if (tracer_uses_snapshot(tr->current_trace))
7221 return -EBUSY;
7222
7223 local_irq_disable();
7224 arch_spin_lock(&tr->max_lock);
7225 if (tr->cond_snapshot)
7226 ret = -EBUSY;
7227 arch_spin_unlock(&tr->max_lock);
7228 local_irq_enable();
7229 if (ret)
7230 return ret;
7231
7232 switch (val) {
7233 case 0:
7234 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7235 return -EINVAL;
7236 if (tr->allocated_snapshot)
7237 free_snapshot(tr);
7238 break;
7239 case 1:
7240 /* Only allow per-cpu swap if the ring buffer supports it */
7241 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7242 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7243 return -EINVAL;
7244 #endif
7245 if (tr->allocated_snapshot)
7246 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
7247 &tr->array_buffer, iter->cpu_file);
7248
7249 ret = tracing_arm_snapshot_locked(tr);
7250 if (ret)
7251 return ret;
7252
7253 /* Now, we're going to swap */
7254 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7255 local_irq_disable();
7256 update_max_tr(tr, current, smp_processor_id(), NULL);
7257 local_irq_enable();
7258 } else {
7259 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7260 (void *)tr, 1);
7261 }
7262 tracing_disarm_snapshot(tr);
7263 break;
7264 default:
7265 if (tr->allocated_snapshot) {
7266 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7267 tracing_reset_online_cpus(&tr->snapshot_buffer);
7268 else
7269 tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
7270 }
7271 break;
7272 }
7273
7274 if (ret >= 0) {
7275 *ppos += cnt;
7276 ret = cnt;
7277 }
7278
7279 return ret;
7280 }
7281
tracing_snapshot_release(struct inode * inode,struct file * file)7282 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7283 {
7284 struct seq_file *m = file->private_data;
7285 int ret;
7286
7287 ret = tracing_release(inode, file);
7288
7289 if (file->f_mode & FMODE_READ)
7290 return ret;
7291
7292 /* If write only, the seq_file is just a stub */
7293 if (m)
7294 kfree(m->private);
7295 kfree(m);
7296
7297 return 0;
7298 }
7299
7300 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7301 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7302 size_t count, loff_t *ppos);
7303 static int tracing_buffers_release(struct inode *inode, struct file *file);
7304 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7305 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7306
snapshot_raw_open(struct inode * inode,struct file * filp)7307 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7308 {
7309 struct ftrace_buffer_info *info;
7310 int ret;
7311
7312 /* The following checks for tracefs lockdown */
7313 ret = tracing_buffers_open(inode, filp);
7314 if (ret < 0)
7315 return ret;
7316
7317 info = filp->private_data;
7318
7319 if (tracer_uses_snapshot(info->iter.trace)) {
7320 tracing_buffers_release(inode, filp);
7321 return -EBUSY;
7322 }
7323
7324 info->iter.snapshot = true;
7325 info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
7326
7327 return ret;
7328 }
7329
7330 #endif /* CONFIG_TRACER_SNAPSHOT */
7331
7332
7333 static const struct file_operations tracing_thresh_fops = {
7334 .open = tracing_open_generic,
7335 .read = tracing_thresh_read,
7336 .write = tracing_thresh_write,
7337 .llseek = generic_file_llseek,
7338 };
7339
7340 #ifdef CONFIG_TRACER_MAX_TRACE
7341 static const struct file_operations tracing_max_lat_fops = {
7342 .open = tracing_open_generic_tr,
7343 .read = tracing_max_lat_read,
7344 .write = tracing_max_lat_write,
7345 .llseek = generic_file_llseek,
7346 .release = tracing_release_generic_tr,
7347 };
7348 #endif
7349
7350 static const struct file_operations set_tracer_fops = {
7351 .open = tracing_open_generic_tr,
7352 .read = tracing_set_trace_read,
7353 .write = tracing_set_trace_write,
7354 .llseek = generic_file_llseek,
7355 .release = tracing_release_generic_tr,
7356 };
7357
7358 static const struct file_operations tracing_pipe_fops = {
7359 .open = tracing_open_pipe,
7360 .poll = tracing_poll_pipe,
7361 .read = tracing_read_pipe,
7362 .splice_read = tracing_splice_read_pipe,
7363 .release = tracing_release_pipe,
7364 };
7365
7366 static const struct file_operations tracing_entries_fops = {
7367 .open = tracing_open_generic_tr,
7368 .read = tracing_entries_read,
7369 .write = tracing_entries_write,
7370 .llseek = generic_file_llseek,
7371 .release = tracing_release_generic_tr,
7372 };
7373
7374 static const struct file_operations tracing_syscall_buf_fops = {
7375 .open = tracing_open_generic_tr,
7376 .read = tracing_syscall_buf_read,
7377 .write = tracing_syscall_buf_write,
7378 .llseek = generic_file_llseek,
7379 .release = tracing_release_generic_tr,
7380 };
7381
7382 static const struct file_operations tracing_buffer_meta_fops = {
7383 .open = tracing_buffer_meta_open,
7384 .read = seq_read,
7385 .llseek = seq_lseek,
7386 .release = tracing_seq_release,
7387 };
7388
7389 static const struct file_operations tracing_total_entries_fops = {
7390 .open = tracing_open_generic_tr,
7391 .read = tracing_total_entries_read,
7392 .llseek = generic_file_llseek,
7393 .release = tracing_release_generic_tr,
7394 };
7395
7396 static const struct file_operations tracing_free_buffer_fops = {
7397 .open = tracing_open_generic_tr,
7398 .write = tracing_free_buffer_write,
7399 .release = tracing_free_buffer_release,
7400 };
7401
7402 static const struct file_operations tracing_mark_fops = {
7403 .open = tracing_mark_open,
7404 .write = tracing_mark_write,
7405 .release = tracing_mark_release,
7406 };
7407
7408 static const struct file_operations tracing_mark_raw_fops = {
7409 .open = tracing_mark_open,
7410 .write = tracing_mark_raw_write,
7411 .release = tracing_mark_release,
7412 };
7413
7414 static const struct file_operations trace_clock_fops = {
7415 .open = tracing_clock_open,
7416 .read = seq_read,
7417 .llseek = seq_lseek,
7418 .release = tracing_single_release_tr,
7419 .write = tracing_clock_write,
7420 };
7421
7422 static const struct file_operations trace_time_stamp_mode_fops = {
7423 .open = tracing_time_stamp_mode_open,
7424 .read = seq_read,
7425 .llseek = seq_lseek,
7426 .release = tracing_single_release_tr,
7427 };
7428
7429 static const struct file_operations last_boot_fops = {
7430 .open = tracing_last_boot_open,
7431 .read = seq_read,
7432 .llseek = seq_lseek,
7433 .release = tracing_seq_release,
7434 };
7435
7436 #ifdef CONFIG_TRACER_SNAPSHOT
7437 static const struct file_operations snapshot_fops = {
7438 .open = tracing_snapshot_open,
7439 .read = seq_read,
7440 .write = tracing_snapshot_write,
7441 .llseek = tracing_lseek,
7442 .release = tracing_snapshot_release,
7443 };
7444
7445 static const struct file_operations snapshot_raw_fops = {
7446 .open = snapshot_raw_open,
7447 .read = tracing_buffers_read,
7448 .release = tracing_buffers_release,
7449 .splice_read = tracing_buffers_splice_read,
7450 };
7451
7452 #endif /* CONFIG_TRACER_SNAPSHOT */
7453
7454 /*
7455 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7456 * @filp: The active open file structure
7457 * @ubuf: The userspace provided buffer to read value into
7458 * @cnt: The maximum number of bytes to read
7459 * @ppos: The current "file" position
7460 *
7461 * This function implements the write interface for a struct trace_min_max_param.
7462 * The filp->private_data must point to a trace_min_max_param structure that
7463 * defines where to write the value, the min and the max acceptable values,
7464 * and a lock to protect the write.
7465 */
7466 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7467 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7468 {
7469 struct trace_min_max_param *param = filp->private_data;
7470 u64 val;
7471 int err;
7472
7473 if (!param)
7474 return -EFAULT;
7475
7476 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7477 if (err)
7478 return err;
7479
7480 if (param->lock)
7481 mutex_lock(param->lock);
7482
7483 if (param->min && val < *param->min)
7484 err = -EINVAL;
7485
7486 if (param->max && val > *param->max)
7487 err = -EINVAL;
7488
7489 if (!err)
7490 *param->val = val;
7491
7492 if (param->lock)
7493 mutex_unlock(param->lock);
7494
7495 if (err)
7496 return err;
7497
7498 return cnt;
7499 }
7500
7501 /*
7502 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7503 * @filp: The active open file structure
7504 * @ubuf: The userspace provided buffer to read value into
7505 * @cnt: The maximum number of bytes to read
7506 * @ppos: The current "file" position
7507 *
7508 * This function implements the read interface for a struct trace_min_max_param.
7509 * The filp->private_data must point to a trace_min_max_param struct with valid
7510 * data.
7511 */
7512 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7513 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7514 {
7515 struct trace_min_max_param *param = filp->private_data;
7516 char buf[U64_STR_SIZE];
7517 int len;
7518 u64 val;
7519
7520 if (!param)
7521 return -EFAULT;
7522
7523 val = *param->val;
7524
7525 if (cnt > sizeof(buf))
7526 cnt = sizeof(buf);
7527
7528 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7529
7530 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7531 }
7532
7533 const struct file_operations trace_min_max_fops = {
7534 .open = tracing_open_generic,
7535 .read = trace_min_max_read,
7536 .write = trace_min_max_write,
7537 };
7538
7539 #define TRACING_LOG_ERRS_MAX 8
7540 #define TRACING_LOG_LOC_MAX 128
7541
7542 #define CMD_PREFIX " Command: "
7543
7544 struct err_info {
7545 const char **errs; /* ptr to loc-specific array of err strings */
7546 u8 type; /* index into errs -> specific err string */
7547 u16 pos; /* caret position */
7548 u64 ts;
7549 };
7550
7551 struct tracing_log_err {
7552 struct list_head list;
7553 struct err_info info;
7554 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7555 char *cmd; /* what caused err */
7556 };
7557
7558 static DEFINE_MUTEX(tracing_err_log_lock);
7559
alloc_tracing_log_err(int len)7560 static struct tracing_log_err *alloc_tracing_log_err(int len)
7561 {
7562 struct tracing_log_err *err;
7563
7564 err = kzalloc_obj(*err);
7565 if (!err)
7566 return ERR_PTR(-ENOMEM);
7567
7568 err->cmd = kzalloc(len, GFP_KERNEL);
7569 if (!err->cmd) {
7570 kfree(err);
7571 return ERR_PTR(-ENOMEM);
7572 }
7573
7574 return err;
7575 }
7576
free_tracing_log_err(struct tracing_log_err * err)7577 static void free_tracing_log_err(struct tracing_log_err *err)
7578 {
7579 kfree(err->cmd);
7580 kfree(err);
7581 }
7582
get_tracing_log_err(struct trace_array * tr,int len)7583 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7584 int len)
7585 {
7586 struct tracing_log_err *err;
7587 char *cmd;
7588
7589 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7590 err = alloc_tracing_log_err(len);
7591 if (PTR_ERR(err) != -ENOMEM)
7592 tr->n_err_log_entries++;
7593
7594 return err;
7595 }
7596 cmd = kzalloc(len, GFP_KERNEL);
7597 if (!cmd)
7598 return ERR_PTR(-ENOMEM);
7599 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7600 kfree(err->cmd);
7601 err->cmd = cmd;
7602 list_del(&err->list);
7603
7604 return err;
7605 }
7606
7607 /**
7608 * err_pos - find the position of a string within a command for error careting
7609 * @cmd: The tracing command that caused the error
7610 * @str: The string to position the caret at within @cmd
7611 *
7612 * Finds the position of the first occurrence of @str within @cmd. The
7613 * return value can be passed to tracing_log_err() for caret placement
7614 * within @cmd.
7615 *
7616 * Returns the index within @cmd of the first occurrence of @str or 0
7617 * if @str was not found.
7618 */
err_pos(char * cmd,const char * str)7619 unsigned int err_pos(char *cmd, const char *str)
7620 {
7621 char *found;
7622
7623 if (WARN_ON(!strlen(cmd)))
7624 return 0;
7625
7626 found = strstr(cmd, str);
7627 if (found)
7628 return found - cmd;
7629
7630 return 0;
7631 }
7632
7633 /**
7634 * tracing_log_err - write an error to the tracing error log
7635 * @tr: The associated trace array for the error (NULL for top level array)
7636 * @loc: A string describing where the error occurred
7637 * @cmd: The tracing command that caused the error
7638 * @errs: The array of loc-specific static error strings
7639 * @type: The index into errs[], which produces the specific static err string
7640 * @pos: The position the caret should be placed in the cmd
7641 *
7642 * Writes an error into tracing/error_log of the form:
7643 *
7644 * <loc>: error: <text>
7645 * Command: <cmd>
7646 * ^
7647 *
7648 * tracing/error_log is a small log file containing the last
7649 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7650 * unless there has been a tracing error, and the error log can be
7651 * cleared and have its memory freed by writing the empty string in
7652 * truncation mode to it i.e. echo > tracing/error_log.
7653 *
7654 * NOTE: the @errs array along with the @type param are used to
7655 * produce a static error string - this string is not copied and saved
7656 * when the error is logged - only a pointer to it is saved. See
7657 * existing callers for examples of how static strings are typically
7658 * defined for use with tracing_log_err().
7659 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7660 void tracing_log_err(struct trace_array *tr,
7661 const char *loc, const char *cmd,
7662 const char **errs, u8 type, u16 pos)
7663 {
7664 struct tracing_log_err *err;
7665 int len = 0;
7666
7667 if (!tr)
7668 tr = &global_trace;
7669
7670 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7671
7672 guard(mutex)(&tracing_err_log_lock);
7673
7674 err = get_tracing_log_err(tr, len);
7675 if (PTR_ERR(err) == -ENOMEM)
7676 return;
7677
7678 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7679 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7680
7681 err->info.errs = errs;
7682 err->info.type = type;
7683 err->info.pos = pos;
7684 err->info.ts = local_clock();
7685
7686 list_add_tail(&err->list, &tr->err_log);
7687 }
7688
clear_tracing_err_log(struct trace_array * tr)7689 static void clear_tracing_err_log(struct trace_array *tr)
7690 {
7691 struct tracing_log_err *err, *next;
7692
7693 guard(mutex)(&tracing_err_log_lock);
7694
7695 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7696 list_del(&err->list);
7697 free_tracing_log_err(err);
7698 }
7699
7700 tr->n_err_log_entries = 0;
7701 }
7702
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7703 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7704 {
7705 struct trace_array *tr = m->private;
7706
7707 mutex_lock(&tracing_err_log_lock);
7708
7709 return seq_list_start(&tr->err_log, *pos);
7710 }
7711
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7712 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7713 {
7714 struct trace_array *tr = m->private;
7715
7716 return seq_list_next(v, &tr->err_log, pos);
7717 }
7718
tracing_err_log_seq_stop(struct seq_file * m,void * v)7719 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7720 {
7721 mutex_unlock(&tracing_err_log_lock);
7722 }
7723
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7724 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7725 {
7726 u16 i;
7727
7728 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7729 seq_putc(m, ' ');
7730 for (i = 0; i < pos; i++)
7731 seq_putc(m, ' ');
7732 seq_puts(m, "^\n");
7733 }
7734
tracing_err_log_seq_show(struct seq_file * m,void * v)7735 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7736 {
7737 struct tracing_log_err *err = v;
7738
7739 if (err) {
7740 const char *err_text = err->info.errs[err->info.type];
7741 u64 sec = err->info.ts;
7742 u32 nsec;
7743
7744 nsec = do_div(sec, NSEC_PER_SEC);
7745 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7746 err->loc, err_text);
7747 seq_printf(m, "%s", err->cmd);
7748 tracing_err_log_show_pos(m, err->info.pos);
7749 }
7750
7751 return 0;
7752 }
7753
7754 static const struct seq_operations tracing_err_log_seq_ops = {
7755 .start = tracing_err_log_seq_start,
7756 .next = tracing_err_log_seq_next,
7757 .stop = tracing_err_log_seq_stop,
7758 .show = tracing_err_log_seq_show
7759 };
7760
tracing_err_log_open(struct inode * inode,struct file * file)7761 static int tracing_err_log_open(struct inode *inode, struct file *file)
7762 {
7763 struct trace_array *tr = inode->i_private;
7764 int ret = 0;
7765
7766 ret = tracing_check_open_get_tr(tr);
7767 if (ret)
7768 return ret;
7769
7770 /* If this file was opened for write, then erase contents */
7771 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7772 clear_tracing_err_log(tr);
7773
7774 if (file->f_mode & FMODE_READ) {
7775 ret = seq_open(file, &tracing_err_log_seq_ops);
7776 if (!ret) {
7777 struct seq_file *m = file->private_data;
7778 m->private = tr;
7779 } else {
7780 trace_array_put(tr);
7781 }
7782 }
7783 return ret;
7784 }
7785
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7786 static ssize_t tracing_err_log_write(struct file *file,
7787 const char __user *buffer,
7788 size_t count, loff_t *ppos)
7789 {
7790 return count;
7791 }
7792
tracing_err_log_release(struct inode * inode,struct file * file)7793 static int tracing_err_log_release(struct inode *inode, struct file *file)
7794 {
7795 struct trace_array *tr = inode->i_private;
7796
7797 trace_array_put(tr);
7798
7799 if (file->f_mode & FMODE_READ)
7800 seq_release(inode, file);
7801
7802 return 0;
7803 }
7804
7805 static const struct file_operations tracing_err_log_fops = {
7806 .open = tracing_err_log_open,
7807 .write = tracing_err_log_write,
7808 .read = seq_read,
7809 .llseek = tracing_lseek,
7810 .release = tracing_err_log_release,
7811 };
7812
tracing_buffers_open(struct inode * inode,struct file * filp)7813 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7814 {
7815 struct trace_array *tr = inode->i_private;
7816 struct ftrace_buffer_info *info;
7817 int ret;
7818
7819 ret = tracing_check_open_get_tr(tr);
7820 if (ret)
7821 return ret;
7822
7823 info = kvzalloc_obj(*info);
7824 if (!info) {
7825 trace_array_put(tr);
7826 return -ENOMEM;
7827 }
7828
7829 mutex_lock(&trace_types_lock);
7830
7831 info->iter.tr = tr;
7832 info->iter.cpu_file = tracing_get_cpu(inode);
7833 info->iter.trace = tr->current_trace;
7834 info->iter.array_buffer = &tr->array_buffer;
7835 info->spare = NULL;
7836 /* Force reading ring buffer for first read */
7837 info->read = (unsigned int)-1;
7838
7839 filp->private_data = info;
7840
7841 tr->trace_ref++;
7842
7843 mutex_unlock(&trace_types_lock);
7844
7845 ret = nonseekable_open(inode, filp);
7846 if (ret < 0)
7847 trace_array_put(tr);
7848
7849 return ret;
7850 }
7851
7852 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7853 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7854 {
7855 struct ftrace_buffer_info *info = filp->private_data;
7856 struct trace_iterator *iter = &info->iter;
7857
7858 return trace_poll(iter, filp, poll_table);
7859 }
7860
7861 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7862 tracing_buffers_read(struct file *filp, char __user *ubuf,
7863 size_t count, loff_t *ppos)
7864 {
7865 struct ftrace_buffer_info *info = filp->private_data;
7866 struct trace_iterator *iter = &info->iter;
7867 void *trace_data;
7868 int page_size;
7869 ssize_t ret = 0;
7870 ssize_t size;
7871
7872 if (!count)
7873 return 0;
7874
7875 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7876 return -EBUSY;
7877
7878 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7879
7880 /* Make sure the spare matches the current sub buffer size */
7881 if (info->spare) {
7882 if (page_size != info->spare_size) {
7883 ring_buffer_free_read_page(iter->array_buffer->buffer,
7884 info->spare_cpu, info->spare);
7885 info->spare = NULL;
7886 }
7887 }
7888
7889 if (!info->spare) {
7890 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7891 iter->cpu_file);
7892 if (IS_ERR(info->spare)) {
7893 ret = PTR_ERR(info->spare);
7894 info->spare = NULL;
7895 } else {
7896 info->spare_cpu = iter->cpu_file;
7897 info->spare_size = page_size;
7898 }
7899 }
7900 if (!info->spare)
7901 return ret;
7902
7903 /* Do we have previous read data to read? */
7904 if (info->read < page_size)
7905 goto read;
7906
7907 again:
7908 trace_access_lock(iter->cpu_file);
7909 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7910 info->spare,
7911 count,
7912 iter->cpu_file, 0);
7913 trace_access_unlock(iter->cpu_file);
7914
7915 if (ret < 0) {
7916 if (trace_empty(iter) && !iter->closed) {
7917 if (update_last_data_if_empty(iter->tr))
7918 return 0;
7919
7920 if ((filp->f_flags & O_NONBLOCK))
7921 return -EAGAIN;
7922
7923 ret = wait_on_pipe(iter, 0);
7924 if (ret)
7925 return ret;
7926
7927 goto again;
7928 }
7929 return 0;
7930 }
7931
7932 info->read = 0;
7933 read:
7934 size = page_size - info->read;
7935 if (size > count)
7936 size = count;
7937 trace_data = ring_buffer_read_page_data(info->spare);
7938 ret = copy_to_user(ubuf, trace_data + info->read, size);
7939 if (ret == size)
7940 return -EFAULT;
7941
7942 size -= ret;
7943
7944 *ppos += size;
7945 info->read += size;
7946
7947 return size;
7948 }
7949
tracing_buffers_flush(struct file * file,fl_owner_t id)7950 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7951 {
7952 struct ftrace_buffer_info *info = file->private_data;
7953 struct trace_iterator *iter = &info->iter;
7954
7955 iter->closed = true;
7956 /* Make sure the waiters see the new wait_index */
7957 (void)atomic_fetch_inc_release(&iter->wait_index);
7958
7959 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7960
7961 return 0;
7962 }
7963
tracing_buffers_release(struct inode * inode,struct file * file)7964 static int tracing_buffers_release(struct inode *inode, struct file *file)
7965 {
7966 struct ftrace_buffer_info *info = file->private_data;
7967 struct trace_iterator *iter = &info->iter;
7968
7969 guard(mutex)(&trace_types_lock);
7970
7971 iter->tr->trace_ref--;
7972
7973 __trace_array_put(iter->tr);
7974
7975 if (info->spare)
7976 ring_buffer_free_read_page(iter->array_buffer->buffer,
7977 info->spare_cpu, info->spare);
7978 kvfree(info);
7979
7980 return 0;
7981 }
7982
7983 struct buffer_ref {
7984 struct trace_buffer *buffer;
7985 void *page;
7986 int cpu;
7987 refcount_t refcount;
7988 };
7989
buffer_ref_release(struct buffer_ref * ref)7990 static void buffer_ref_release(struct buffer_ref *ref)
7991 {
7992 if (!refcount_dec_and_test(&ref->refcount))
7993 return;
7994 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7995 kfree(ref);
7996 }
7997
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7998 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7999 struct pipe_buffer *buf)
8000 {
8001 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8002
8003 buffer_ref_release(ref);
8004 buf->private = 0;
8005 }
8006
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8007 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8008 struct pipe_buffer *buf)
8009 {
8010 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8011
8012 if (refcount_read(&ref->refcount) > INT_MAX/2)
8013 return false;
8014
8015 refcount_inc(&ref->refcount);
8016 return true;
8017 }
8018
8019 /* Pipe buffer operations for a buffer. */
8020 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8021 .release = buffer_pipe_buf_release,
8022 .get = buffer_pipe_buf_get,
8023 };
8024
8025 /*
8026 * Callback from splice_to_pipe(), if we need to release some pages
8027 * at the end of the spd in case we error'ed out in filling the pipe.
8028 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8029 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8030 {
8031 struct buffer_ref *ref =
8032 (struct buffer_ref *)spd->partial[i].private;
8033
8034 buffer_ref_release(ref);
8035 spd->partial[i].private = 0;
8036 }
8037
8038 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8039 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8040 struct pipe_inode_info *pipe, size_t len,
8041 unsigned int flags)
8042 {
8043 struct ftrace_buffer_info *info = file->private_data;
8044 struct trace_iterator *iter = &info->iter;
8045 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8046 struct page *pages_def[PIPE_DEF_BUFFERS];
8047 struct splice_pipe_desc spd = {
8048 .pages = pages_def,
8049 .partial = partial_def,
8050 .nr_pages_max = PIPE_DEF_BUFFERS,
8051 .ops = &buffer_pipe_buf_ops,
8052 .spd_release = buffer_spd_release,
8053 };
8054 struct buffer_ref *ref;
8055 bool woken = false;
8056 int page_size;
8057 int entries, i;
8058 ssize_t ret = 0;
8059
8060 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
8061 return -EBUSY;
8062
8063 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8064 if (*ppos & (page_size - 1))
8065 return -EINVAL;
8066
8067 if (len & (page_size - 1)) {
8068 if (len < page_size)
8069 return -EINVAL;
8070 len &= (~(page_size - 1));
8071 }
8072
8073 if (splice_grow_spd(pipe, &spd))
8074 return -ENOMEM;
8075
8076 again:
8077 trace_access_lock(iter->cpu_file);
8078 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8079
8080 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8081 struct page *page;
8082 int r;
8083
8084 ref = kzalloc_obj(*ref);
8085 if (!ref) {
8086 ret = -ENOMEM;
8087 break;
8088 }
8089
8090 refcount_set(&ref->refcount, 1);
8091 ref->buffer = iter->array_buffer->buffer;
8092 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8093 if (IS_ERR(ref->page)) {
8094 ret = PTR_ERR(ref->page);
8095 ref->page = NULL;
8096 kfree(ref);
8097 break;
8098 }
8099 ref->cpu = iter->cpu_file;
8100
8101 r = ring_buffer_read_page(ref->buffer, ref->page,
8102 len, iter->cpu_file, 1);
8103 if (r < 0) {
8104 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8105 ref->page);
8106 kfree(ref);
8107 break;
8108 }
8109
8110 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8111
8112 spd.pages[i] = page;
8113 spd.partial[i].len = page_size;
8114 spd.partial[i].offset = 0;
8115 spd.partial[i].private = (unsigned long)ref;
8116 spd.nr_pages++;
8117 *ppos += page_size;
8118
8119 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8120 }
8121
8122 trace_access_unlock(iter->cpu_file);
8123 spd.nr_pages = i;
8124
8125 /* did we read anything? */
8126 if (!spd.nr_pages) {
8127
8128 if (ret)
8129 goto out;
8130
8131 if (woken)
8132 goto out;
8133
8134 ret = -EAGAIN;
8135 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8136 goto out;
8137
8138 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8139 if (ret)
8140 goto out;
8141
8142 /* No need to wait after waking up when tracing is off */
8143 if (!tracer_tracing_is_on(iter->tr))
8144 goto out;
8145
8146 /* Iterate one more time to collect any new data then exit */
8147 woken = true;
8148
8149 goto again;
8150 }
8151
8152 ret = splice_to_pipe(pipe, &spd);
8153 out:
8154 splice_shrink_spd(&spd);
8155
8156 return ret;
8157 }
8158
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8159 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8160 {
8161 struct ftrace_buffer_info *info = file->private_data;
8162 struct trace_iterator *iter = &info->iter;
8163 int err;
8164
8165 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8166 if (!(file->f_flags & O_NONBLOCK)) {
8167 err = ring_buffer_wait(iter->array_buffer->buffer,
8168 iter->cpu_file,
8169 iter->tr->buffer_percent,
8170 NULL, NULL);
8171 if (err)
8172 return err;
8173 }
8174
8175 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8176 iter->cpu_file);
8177 } else if (cmd) {
8178 return -ENOTTY;
8179 }
8180
8181 /*
8182 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8183 * waiters
8184 */
8185 guard(mutex)(&trace_types_lock);
8186
8187 /* Make sure the waiters see the new wait_index */
8188 (void)atomic_fetch_inc_release(&iter->wait_index);
8189
8190 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8191
8192 return 0;
8193 }
8194
8195 #ifdef CONFIG_TRACER_SNAPSHOT
get_snapshot_map(struct trace_array * tr)8196 static int get_snapshot_map(struct trace_array *tr)
8197 {
8198 int err = 0;
8199
8200 /*
8201 * Called with mmap_lock held. lockdep would be unhappy if we would now
8202 * take trace_types_lock. Instead use the specific
8203 * snapshot_trigger_lock.
8204 */
8205 spin_lock(&tr->snapshot_trigger_lock);
8206
8207 if (tr->snapshot || tr->mapped == UINT_MAX)
8208 err = -EBUSY;
8209 else
8210 tr->mapped++;
8211
8212 spin_unlock(&tr->snapshot_trigger_lock);
8213
8214 /* Wait for update_max_tr() to observe iter->tr->mapped */
8215 if (tr->mapped == 1)
8216 synchronize_rcu();
8217
8218 return err;
8219
8220 }
put_snapshot_map(struct trace_array * tr)8221 static void put_snapshot_map(struct trace_array *tr)
8222 {
8223 spin_lock(&tr->snapshot_trigger_lock);
8224 if (!WARN_ON(!tr->mapped))
8225 tr->mapped--;
8226 spin_unlock(&tr->snapshot_trigger_lock);
8227 }
8228 #else
get_snapshot_map(struct trace_array * tr)8229 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8230 static inline void put_snapshot_map(struct trace_array *tr) { }
8231 #endif
8232
8233 /*
8234 * This is called when a VMA is duplicated (e.g., on fork()) to increment
8235 * the user_mapped counter without remapping pages.
8236 */
tracing_buffers_mmap_open(struct vm_area_struct * vma)8237 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
8238 {
8239 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8240 struct trace_iterator *iter = &info->iter;
8241
8242 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
8243 }
8244
tracing_buffers_mmap_close(struct vm_area_struct * vma)8245 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8246 {
8247 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8248 struct trace_iterator *iter = &info->iter;
8249
8250 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8251 put_snapshot_map(iter->tr);
8252 }
8253
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8254 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
8255 {
8256 /*
8257 * Trace buffer mappings require the complete buffer including
8258 * the meta page. Partial mappings are not supported.
8259 */
8260 return -EINVAL;
8261 }
8262
8263 static const struct vm_operations_struct tracing_buffers_vmops = {
8264 .open = tracing_buffers_mmap_open,
8265 .close = tracing_buffers_mmap_close,
8266 .may_split = tracing_buffers_may_split,
8267 };
8268
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8269 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8270 {
8271 struct ftrace_buffer_info *info = filp->private_data;
8272 struct trace_iterator *iter = &info->iter;
8273 int ret = 0;
8274
8275 /* A memmap'ed and backup buffers are not supported for user space mmap */
8276 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
8277 return -ENODEV;
8278
8279 ret = get_snapshot_map(iter->tr);
8280 if (ret)
8281 return ret;
8282
8283 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8284 if (ret)
8285 put_snapshot_map(iter->tr);
8286
8287 vma->vm_ops = &tracing_buffers_vmops;
8288
8289 return ret;
8290 }
8291
8292 static const struct file_operations tracing_buffers_fops = {
8293 .open = tracing_buffers_open,
8294 .read = tracing_buffers_read,
8295 .poll = tracing_buffers_poll,
8296 .release = tracing_buffers_release,
8297 .flush = tracing_buffers_flush,
8298 .splice_read = tracing_buffers_splice_read,
8299 .unlocked_ioctl = tracing_buffers_ioctl,
8300 .mmap = tracing_buffers_mmap,
8301 };
8302
8303 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8304 tracing_stats_read(struct file *filp, char __user *ubuf,
8305 size_t count, loff_t *ppos)
8306 {
8307 struct inode *inode = file_inode(filp);
8308 struct trace_array *tr = inode->i_private;
8309 struct array_buffer *trace_buf = &tr->array_buffer;
8310 int cpu = tracing_get_cpu(inode);
8311 struct trace_seq *s;
8312 unsigned long cnt;
8313 unsigned long long t;
8314 unsigned long usec_rem;
8315
8316 s = kmalloc_obj(*s);
8317 if (!s)
8318 return -ENOMEM;
8319
8320 trace_seq_init(s);
8321
8322 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8323 trace_seq_printf(s, "entries: %ld\n", cnt);
8324
8325 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8326 trace_seq_printf(s, "overrun: %ld\n", cnt);
8327
8328 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8329 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8330
8331 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8332 trace_seq_printf(s, "bytes: %ld\n", cnt);
8333
8334 if (trace_clocks[tr->clock_id].in_ns) {
8335 /* local or global for trace_clock */
8336 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8337 usec_rem = do_div(t, USEC_PER_SEC);
8338 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8339 t, usec_rem);
8340
8341 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8342 usec_rem = do_div(t, USEC_PER_SEC);
8343 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8344 } else {
8345 /* counter or tsc mode for trace_clock */
8346 trace_seq_printf(s, "oldest event ts: %llu\n",
8347 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8348
8349 trace_seq_printf(s, "now ts: %llu\n",
8350 ring_buffer_time_stamp(trace_buf->buffer));
8351 }
8352
8353 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8354 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8355
8356 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8357 trace_seq_printf(s, "read events: %ld\n", cnt);
8358
8359 count = simple_read_from_buffer(ubuf, count, ppos,
8360 s->buffer, trace_seq_used(s));
8361
8362 kfree(s);
8363
8364 return count;
8365 }
8366
8367 static const struct file_operations tracing_stats_fops = {
8368 .open = tracing_open_generic_tr,
8369 .read = tracing_stats_read,
8370 .llseek = generic_file_llseek,
8371 .release = tracing_release_generic_tr,
8372 };
8373
8374 #ifdef CONFIG_DYNAMIC_FTRACE
8375
8376 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8377 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8378 size_t cnt, loff_t *ppos)
8379 {
8380 ssize_t ret;
8381 char *buf;
8382 int r;
8383
8384 /* 512 should be plenty to hold the amount needed */
8385 #define DYN_INFO_BUF_SIZE 512
8386
8387 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8388 if (!buf)
8389 return -ENOMEM;
8390
8391 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8392 "%ld pages:%ld groups: %ld\n"
8393 "ftrace boot update time = %llu (ns)\n"
8394 "ftrace module total update time = %llu (ns)\n",
8395 ftrace_update_tot_cnt,
8396 ftrace_number_of_pages,
8397 ftrace_number_of_groups,
8398 ftrace_update_time,
8399 ftrace_total_mod_time);
8400
8401 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8402 kfree(buf);
8403 return ret;
8404 }
8405
8406 static const struct file_operations tracing_dyn_info_fops = {
8407 .open = tracing_open_generic,
8408 .read = tracing_read_dyn_info,
8409 .llseek = generic_file_llseek,
8410 };
8411 #endif /* CONFIG_DYNAMIC_FTRACE */
8412
8413 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8414 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8415 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8416 struct trace_array *tr, struct ftrace_probe_ops *ops,
8417 void *data)
8418 {
8419 tracing_snapshot_instance(tr);
8420 }
8421
8422 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8423 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8424 struct trace_array *tr, struct ftrace_probe_ops *ops,
8425 void *data)
8426 {
8427 struct ftrace_func_mapper *mapper = data;
8428 long *count = NULL;
8429
8430 if (mapper)
8431 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8432
8433 if (count) {
8434
8435 if (*count <= 0)
8436 return;
8437
8438 (*count)--;
8439 }
8440
8441 tracing_snapshot_instance(tr);
8442 }
8443
8444 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8445 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8446 struct ftrace_probe_ops *ops, void *data)
8447 {
8448 struct ftrace_func_mapper *mapper = data;
8449 long *count = NULL;
8450
8451 seq_printf(m, "%ps:", (void *)ip);
8452
8453 seq_puts(m, "snapshot");
8454
8455 if (mapper)
8456 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8457
8458 if (count)
8459 seq_printf(m, ":count=%ld\n", *count);
8460 else
8461 seq_puts(m, ":unlimited\n");
8462
8463 return 0;
8464 }
8465
8466 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8467 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8468 unsigned long ip, void *init_data, void **data)
8469 {
8470 struct ftrace_func_mapper *mapper = *data;
8471
8472 if (!mapper) {
8473 mapper = allocate_ftrace_func_mapper();
8474 if (!mapper)
8475 return -ENOMEM;
8476 *data = mapper;
8477 }
8478
8479 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8480 }
8481
8482 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8483 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8484 unsigned long ip, void *data)
8485 {
8486 struct ftrace_func_mapper *mapper = data;
8487
8488 if (!ip) {
8489 if (!mapper)
8490 return;
8491 free_ftrace_func_mapper(mapper, NULL);
8492 return;
8493 }
8494
8495 ftrace_func_mapper_remove_ip(mapper, ip);
8496 }
8497
8498 static struct ftrace_probe_ops snapshot_probe_ops = {
8499 .func = ftrace_snapshot,
8500 .print = ftrace_snapshot_print,
8501 };
8502
8503 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8504 .func = ftrace_count_snapshot,
8505 .print = ftrace_snapshot_print,
8506 .init = ftrace_snapshot_init,
8507 .free = ftrace_snapshot_free,
8508 };
8509
8510 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8511 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8512 char *glob, char *cmd, char *param, int enable)
8513 {
8514 struct ftrace_probe_ops *ops;
8515 void *count = (void *)-1;
8516 char *number;
8517 int ret;
8518
8519 if (!tr)
8520 return -ENODEV;
8521
8522 /* hash funcs only work with set_ftrace_filter */
8523 if (!enable)
8524 return -EINVAL;
8525
8526 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8527
8528 if (glob[0] == '!') {
8529 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8530 if (!ret)
8531 tracing_disarm_snapshot(tr);
8532
8533 return ret;
8534 }
8535
8536 if (!param)
8537 goto out_reg;
8538
8539 number = strsep(¶m, ":");
8540
8541 if (!strlen(number))
8542 goto out_reg;
8543
8544 /*
8545 * We use the callback data field (which is a pointer)
8546 * as our counter.
8547 */
8548 ret = kstrtoul(number, 0, (unsigned long *)&count);
8549 if (ret)
8550 return ret;
8551
8552 out_reg:
8553 ret = tracing_arm_snapshot(tr);
8554 if (ret < 0)
8555 return ret;
8556
8557 ret = register_ftrace_function_probe(glob, tr, ops, count);
8558 if (ret < 0)
8559 tracing_disarm_snapshot(tr);
8560
8561 return ret < 0 ? ret : 0;
8562 }
8563
8564 static struct ftrace_func_command ftrace_snapshot_cmd = {
8565 .name = "snapshot",
8566 .func = ftrace_trace_snapshot_callback,
8567 };
8568
register_snapshot_cmd(void)8569 static __init int register_snapshot_cmd(void)
8570 {
8571 return register_ftrace_command(&ftrace_snapshot_cmd);
8572 }
8573 #else
register_snapshot_cmd(void)8574 static inline __init int register_snapshot_cmd(void) { return 0; }
8575 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8576
tracing_get_dentry(struct trace_array * tr)8577 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8578 {
8579 /* Top directory uses NULL as the parent */
8580 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8581 return NULL;
8582
8583 if (WARN_ON(!tr->dir))
8584 return ERR_PTR(-ENODEV);
8585
8586 /* All sub buffers have a descriptor */
8587 return tr->dir;
8588 }
8589
tracing_dentry_percpu(struct trace_array * tr,int cpu)8590 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8591 {
8592 struct dentry *d_tracer;
8593
8594 if (tr->percpu_dir)
8595 return tr->percpu_dir;
8596
8597 d_tracer = tracing_get_dentry(tr);
8598 if (IS_ERR(d_tracer))
8599 return NULL;
8600
8601 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8602
8603 MEM_FAIL(!tr->percpu_dir,
8604 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8605
8606 return tr->percpu_dir;
8607 }
8608
8609 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8610 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8611 void *data, long cpu, const struct file_operations *fops)
8612 {
8613 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8614
8615 if (ret) /* See tracing_get_cpu() */
8616 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8617 return ret;
8618 }
8619
8620 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8621 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8622 {
8623 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8624 struct dentry *d_cpu;
8625 char cpu_dir[30]; /* 30 characters should be more than enough */
8626
8627 if (!d_percpu)
8628 return;
8629
8630 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8631 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8632 if (!d_cpu) {
8633 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8634 return;
8635 }
8636
8637 /* per cpu trace_pipe */
8638 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8639 tr, cpu, &tracing_pipe_fops);
8640
8641 /* per cpu trace */
8642 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8643 tr, cpu, &tracing_fops);
8644
8645 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8646 tr, cpu, &tracing_buffers_fops);
8647
8648 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8649 tr, cpu, &tracing_stats_fops);
8650
8651 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
8652 tr, cpu, &tracing_entries_fops);
8653
8654 if (tr->range_addr_start)
8655 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8656 tr, cpu, &tracing_buffer_meta_fops);
8657 #ifdef CONFIG_TRACER_SNAPSHOT
8658 if (!tr->range_addr_start) {
8659 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8660 tr, cpu, &snapshot_fops);
8661
8662 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8663 tr, cpu, &snapshot_raw_fops);
8664 }
8665 #endif
8666 }
8667
8668 #ifdef CONFIG_FTRACE_SELFTEST
8669 /* Let selftest have access to static functions in this file */
8670 #include "trace_selftest.c"
8671 #endif
8672
8673 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8674 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8675 loff_t *ppos)
8676 {
8677 struct trace_option_dentry *topt = filp->private_data;
8678 char *buf;
8679
8680 if (topt->flags->val & topt->opt->bit)
8681 buf = "1\n";
8682 else
8683 buf = "0\n";
8684
8685 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8686 }
8687
8688 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8689 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8690 loff_t *ppos)
8691 {
8692 struct trace_option_dentry *topt = filp->private_data;
8693 unsigned long val;
8694 int ret;
8695
8696 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8697 if (ret)
8698 return ret;
8699
8700 if (val != 0 && val != 1)
8701 return -EINVAL;
8702
8703 if (!!(topt->flags->val & topt->opt->bit) != val) {
8704 guard(mutex)(&trace_types_lock);
8705 ret = __set_tracer_option(topt->tr, topt->flags,
8706 topt->opt, !val);
8707 if (ret)
8708 return ret;
8709 }
8710
8711 *ppos += cnt;
8712
8713 return cnt;
8714 }
8715
tracing_open_options(struct inode * inode,struct file * filp)8716 static int tracing_open_options(struct inode *inode, struct file *filp)
8717 {
8718 struct trace_option_dentry *topt = inode->i_private;
8719 int ret;
8720
8721 ret = tracing_check_open_get_tr(topt->tr);
8722 if (ret)
8723 return ret;
8724
8725 filp->private_data = inode->i_private;
8726 return 0;
8727 }
8728
tracing_release_options(struct inode * inode,struct file * file)8729 static int tracing_release_options(struct inode *inode, struct file *file)
8730 {
8731 struct trace_option_dentry *topt = file->private_data;
8732
8733 trace_array_put(topt->tr);
8734 return 0;
8735 }
8736
8737 static const struct file_operations trace_options_fops = {
8738 .open = tracing_open_options,
8739 .read = trace_options_read,
8740 .write = trace_options_write,
8741 .llseek = generic_file_llseek,
8742 .release = tracing_release_options,
8743 };
8744
8745 /*
8746 * In order to pass in both the trace_array descriptor as well as the index
8747 * to the flag that the trace option file represents, the trace_array
8748 * has a character array of trace_flags_index[], which holds the index
8749 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8750 * The address of this character array is passed to the flag option file
8751 * read/write callbacks.
8752 *
8753 * In order to extract both the index and the trace_array descriptor,
8754 * get_tr_index() uses the following algorithm.
8755 *
8756 * idx = *ptr;
8757 *
8758 * As the pointer itself contains the address of the index (remember
8759 * index[1] == 1).
8760 *
8761 * Then to get the trace_array descriptor, by subtracting that index
8762 * from the ptr, we get to the start of the index itself.
8763 *
8764 * ptr - idx == &index[0]
8765 *
8766 * Then a simple container_of() from that pointer gets us to the
8767 * trace_array descriptor.
8768 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8769 static void get_tr_index(void *data, struct trace_array **ptr,
8770 unsigned int *pindex)
8771 {
8772 *pindex = *(unsigned char *)data;
8773
8774 *ptr = container_of(data - *pindex, struct trace_array,
8775 trace_flags_index);
8776 }
8777
8778 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8779 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8780 loff_t *ppos)
8781 {
8782 void *tr_index = filp->private_data;
8783 struct trace_array *tr;
8784 unsigned int index;
8785 char *buf;
8786
8787 get_tr_index(tr_index, &tr, &index);
8788
8789 if (tr->trace_flags & (1ULL << index))
8790 buf = "1\n";
8791 else
8792 buf = "0\n";
8793
8794 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8795 }
8796
8797 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8798 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8799 loff_t *ppos)
8800 {
8801 void *tr_index = filp->private_data;
8802 struct trace_array *tr;
8803 unsigned int index;
8804 unsigned long val;
8805 int ret;
8806
8807 get_tr_index(tr_index, &tr, &index);
8808
8809 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8810 if (ret)
8811 return ret;
8812
8813 if (val != 0 && val != 1)
8814 return -EINVAL;
8815
8816 mutex_lock(&event_mutex);
8817 mutex_lock(&trace_types_lock);
8818 ret = set_tracer_flag(tr, 1ULL << index, val);
8819 mutex_unlock(&trace_types_lock);
8820 mutex_unlock(&event_mutex);
8821
8822 if (ret < 0)
8823 return ret;
8824
8825 *ppos += cnt;
8826
8827 return cnt;
8828 }
8829
8830 static const struct file_operations trace_options_core_fops = {
8831 .open = tracing_open_generic,
8832 .read = trace_options_core_read,
8833 .write = trace_options_core_write,
8834 .llseek = generic_file_llseek,
8835 };
8836
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8837 struct dentry *trace_create_file(const char *name,
8838 umode_t mode,
8839 struct dentry *parent,
8840 void *data,
8841 const struct file_operations *fops)
8842 {
8843 struct dentry *ret;
8844
8845 ret = tracefs_create_file(name, mode, parent, data, fops);
8846 if (!ret)
8847 pr_warn("Could not create tracefs '%s' entry\n", name);
8848
8849 return ret;
8850 }
8851
8852
trace_options_init_dentry(struct trace_array * tr)8853 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8854 {
8855 struct dentry *d_tracer;
8856
8857 if (tr->options)
8858 return tr->options;
8859
8860 d_tracer = tracing_get_dentry(tr);
8861 if (IS_ERR(d_tracer))
8862 return NULL;
8863
8864 tr->options = tracefs_create_dir("options", d_tracer);
8865 if (!tr->options) {
8866 pr_warn("Could not create tracefs directory 'options'\n");
8867 return NULL;
8868 }
8869
8870 return tr->options;
8871 }
8872
8873 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8874 create_trace_option_file(struct trace_array *tr,
8875 struct trace_option_dentry *topt,
8876 struct tracer_flags *flags,
8877 struct tracer_opt *opt)
8878 {
8879 struct dentry *t_options;
8880
8881 t_options = trace_options_init_dentry(tr);
8882 if (!t_options)
8883 return;
8884
8885 topt->flags = flags;
8886 topt->opt = opt;
8887 topt->tr = tr;
8888
8889 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8890 t_options, topt, &trace_options_fops);
8891 }
8892
8893 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)8894 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
8895 struct tracer_flags *flags)
8896 {
8897 struct trace_option_dentry *topts;
8898 struct trace_options *tr_topts;
8899 struct tracer_opt *opts;
8900 int cnt;
8901
8902 if (!flags || !flags->opts)
8903 return 0;
8904
8905 opts = flags->opts;
8906
8907 for (cnt = 0; opts[cnt].name; cnt++)
8908 ;
8909
8910 topts = kzalloc_objs(*topts, cnt + 1);
8911 if (!topts)
8912 return 0;
8913
8914 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8915 GFP_KERNEL);
8916 if (!tr_topts) {
8917 kfree(topts);
8918 return -ENOMEM;
8919 }
8920
8921 tr->topts = tr_topts;
8922 tr->topts[tr->nr_topts].tracer = tracer;
8923 tr->topts[tr->nr_topts].topts = topts;
8924 tr->nr_topts++;
8925
8926 for (cnt = 0; opts[cnt].name; cnt++) {
8927 create_trace_option_file(tr, &topts[cnt], flags,
8928 &opts[cnt]);
8929 MEM_FAIL(topts[cnt].entry == NULL,
8930 "Failed to create trace option: %s",
8931 opts[cnt].name);
8932 }
8933 return 0;
8934 }
8935
get_global_flags_val(struct tracer * tracer)8936 static int get_global_flags_val(struct tracer *tracer)
8937 {
8938 struct tracers *t;
8939
8940 list_for_each_entry(t, &global_trace.tracers, list) {
8941 if (t->tracer != tracer)
8942 continue;
8943 if (!t->flags)
8944 return -1;
8945 return t->flags->val;
8946 }
8947 return -1;
8948 }
8949
add_tracer_options(struct trace_array * tr,struct tracers * t)8950 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
8951 {
8952 struct tracer *tracer = t->tracer;
8953 struct tracer_flags *flags = t->flags ?: tracer->flags;
8954
8955 if (!flags)
8956 return 0;
8957
8958 /* Only add tracer options after update_tracer_options finish */
8959 if (!tracer_options_updated)
8960 return 0;
8961
8962 return create_trace_option_files(tr, tracer, flags);
8963 }
8964
add_tracer(struct trace_array * tr,struct tracer * tracer)8965 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
8966 {
8967 struct tracer_flags *flags;
8968 struct tracers *t;
8969 int ret;
8970
8971 /* Only enable if the directory has been created already. */
8972 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
8973 return 0;
8974
8975 /*
8976 * If this is an instance, only create flags for tracers
8977 * the instance may have.
8978 */
8979 if (!trace_ok_for_array(tracer, tr))
8980 return 0;
8981
8982 t = kmalloc_obj(*t);
8983 if (!t)
8984 return -ENOMEM;
8985
8986 t->tracer = tracer;
8987 t->flags = NULL;
8988 list_add(&t->list, &tr->tracers);
8989
8990 flags = tracer->flags;
8991 if (!flags) {
8992 if (!tracer->default_flags)
8993 return 0;
8994
8995 /*
8996 * If the tracer defines default flags, it means the flags are
8997 * per trace instance.
8998 */
8999 flags = kmalloc_obj(*flags);
9000 if (!flags)
9001 return -ENOMEM;
9002
9003 *flags = *tracer->default_flags;
9004 flags->trace = tracer;
9005
9006 t->flags = flags;
9007
9008 /* If this is an instance, inherit the global_trace flags */
9009 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9010 int val = get_global_flags_val(tracer);
9011 if (!WARN_ON_ONCE(val < 0))
9012 flags->val = val;
9013 }
9014 }
9015
9016 ret = add_tracer_options(tr, t);
9017 if (ret < 0) {
9018 list_del(&t->list);
9019 kfree(t->flags);
9020 kfree(t);
9021 }
9022
9023 return ret;
9024 }
9025
9026 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9027 create_trace_option_core_file(struct trace_array *tr,
9028 const char *option, long index)
9029 {
9030 struct dentry *t_options;
9031
9032 t_options = trace_options_init_dentry(tr);
9033 if (!t_options)
9034 return NULL;
9035
9036 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9037 (void *)&tr->trace_flags_index[index],
9038 &trace_options_core_fops);
9039 }
9040
create_trace_options_dir(struct trace_array * tr)9041 static void create_trace_options_dir(struct trace_array *tr)
9042 {
9043 struct dentry *t_options;
9044 bool top_level = tr == &global_trace;
9045 int i;
9046
9047 t_options = trace_options_init_dentry(tr);
9048 if (!t_options)
9049 return;
9050
9051 for (i = 0; trace_options[i]; i++) {
9052 if (top_level ||
9053 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9054 create_trace_option_core_file(tr, trace_options[i], i);
9055 }
9056 }
9057 }
9058
9059 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9060 rb_simple_read(struct file *filp, char __user *ubuf,
9061 size_t cnt, loff_t *ppos)
9062 {
9063 struct trace_array *tr = filp->private_data;
9064 char buf[64];
9065 int r;
9066
9067 r = tracer_tracing_is_on(tr);
9068 r = sprintf(buf, "%d\n", r);
9069
9070 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9071 }
9072
9073 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9074 rb_simple_write(struct file *filp, const char __user *ubuf,
9075 size_t cnt, loff_t *ppos)
9076 {
9077 struct trace_array *tr = filp->private_data;
9078 struct trace_buffer *buffer = tr->array_buffer.buffer;
9079 unsigned long val;
9080 int ret;
9081
9082 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9083 if (ret)
9084 return ret;
9085
9086 if (buffer) {
9087 guard(mutex)(&trace_types_lock);
9088 if (!!val == tracer_tracing_is_on(tr)) {
9089 val = 0; /* do nothing */
9090 } else if (val) {
9091 tracer_tracing_on(tr);
9092 if (tr->current_trace->start)
9093 tr->current_trace->start(tr);
9094 } else {
9095 tracer_tracing_off(tr);
9096 if (tr->current_trace->stop)
9097 tr->current_trace->stop(tr);
9098 /* Wake up any waiters */
9099 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9100 }
9101 }
9102
9103 (*ppos)++;
9104
9105 return cnt;
9106 }
9107
9108 static const struct file_operations rb_simple_fops = {
9109 .open = tracing_open_generic_tr,
9110 .read = rb_simple_read,
9111 .write = rb_simple_write,
9112 .release = tracing_release_generic_tr,
9113 .llseek = default_llseek,
9114 };
9115
9116 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9117 buffer_percent_read(struct file *filp, char __user *ubuf,
9118 size_t cnt, loff_t *ppos)
9119 {
9120 struct trace_array *tr = filp->private_data;
9121 char buf[64];
9122 int r;
9123
9124 r = tr->buffer_percent;
9125 r = sprintf(buf, "%d\n", r);
9126
9127 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9128 }
9129
9130 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9131 buffer_percent_write(struct file *filp, const char __user *ubuf,
9132 size_t cnt, loff_t *ppos)
9133 {
9134 struct trace_array *tr = filp->private_data;
9135 unsigned long val;
9136 int ret;
9137
9138 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9139 if (ret)
9140 return ret;
9141
9142 if (val > 100)
9143 return -EINVAL;
9144
9145 tr->buffer_percent = val;
9146
9147 (*ppos)++;
9148
9149 return cnt;
9150 }
9151
9152 static const struct file_operations buffer_percent_fops = {
9153 .open = tracing_open_generic_tr,
9154 .read = buffer_percent_read,
9155 .write = buffer_percent_write,
9156 .release = tracing_release_generic_tr,
9157 .llseek = default_llseek,
9158 };
9159
9160 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9161 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9162 {
9163 struct trace_array *tr = filp->private_data;
9164 size_t size;
9165 char buf[64];
9166 int order;
9167 int r;
9168
9169 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9170 size = (PAGE_SIZE << order) / 1024;
9171
9172 r = sprintf(buf, "%zd\n", size);
9173
9174 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9175 }
9176
9177 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9178 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9179 size_t cnt, loff_t *ppos)
9180 {
9181 struct trace_array *tr = filp->private_data;
9182 unsigned long val;
9183 int old_order;
9184 int order;
9185 int pages;
9186 int ret;
9187
9188 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9189 if (ret)
9190 return ret;
9191
9192 val *= 1024; /* value passed in is in KB */
9193
9194 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9195 order = fls(pages - 1);
9196
9197 /* limit between 1 and 128 system pages */
9198 if (order < 0 || order > 7)
9199 return -EINVAL;
9200
9201 /* Do not allow tracing while changing the order of the ring buffer */
9202 tracing_stop_tr(tr);
9203
9204 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9205 if (old_order == order)
9206 goto out;
9207
9208 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9209 if (ret)
9210 goto out;
9211
9212 #ifdef CONFIG_TRACER_SNAPSHOT
9213
9214 if (!tr->allocated_snapshot)
9215 goto out_max;
9216
9217 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
9218 if (ret) {
9219 /* Put back the old order */
9220 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9221 if (WARN_ON_ONCE(cnt)) {
9222 /*
9223 * AARGH! We are left with different orders!
9224 * The max buffer is our "snapshot" buffer.
9225 * When a tracer needs a snapshot (one of the
9226 * latency tracers), it swaps the max buffer
9227 * with the saved snap shot. We succeeded to
9228 * update the order of the main buffer, but failed to
9229 * update the order of the max buffer. But when we tried
9230 * to reset the main buffer to the original size, we
9231 * failed there too. This is very unlikely to
9232 * happen, but if it does, warn and kill all
9233 * tracing.
9234 */
9235 tracing_disabled = 1;
9236 }
9237 goto out;
9238 }
9239 out_max:
9240 #endif
9241 (*ppos)++;
9242 out:
9243 if (ret)
9244 cnt = ret;
9245 tracing_start_tr(tr);
9246 return cnt;
9247 }
9248
9249 static const struct file_operations buffer_subbuf_size_fops = {
9250 .open = tracing_open_generic_tr,
9251 .read = buffer_subbuf_size_read,
9252 .write = buffer_subbuf_size_write,
9253 .release = tracing_release_generic_tr,
9254 .llseek = default_llseek,
9255 };
9256
9257 static struct dentry *trace_instance_dir;
9258
9259 static void
9260 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9261
9262 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9263 static int make_mod_delta(struct module *mod, void *data)
9264 {
9265 struct trace_module_delta *module_delta;
9266 struct trace_scratch *tscratch;
9267 struct trace_mod_entry *entry;
9268 struct trace_array *tr = data;
9269 int i;
9270
9271 tscratch = tr->scratch;
9272 module_delta = READ_ONCE(tr->module_delta);
9273 for (i = 0; i < tscratch->nr_entries; i++) {
9274 entry = &tscratch->entries[i];
9275 if (strcmp(mod->name, entry->mod_name))
9276 continue;
9277 if (mod->state == MODULE_STATE_GOING)
9278 module_delta->delta[i] = 0;
9279 else
9280 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9281 - entry->mod_addr;
9282 break;
9283 }
9284 return 0;
9285 }
9286 #else
make_mod_delta(struct module * mod,void * data)9287 static int make_mod_delta(struct module *mod, void *data)
9288 {
9289 return 0;
9290 }
9291 #endif
9292
mod_addr_comp(const void * a,const void * b,const void * data)9293 static int mod_addr_comp(const void *a, const void *b, const void *data)
9294 {
9295 const struct trace_mod_entry *e1 = a;
9296 const struct trace_mod_entry *e2 = b;
9297
9298 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9299 }
9300
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9301 static void setup_trace_scratch(struct trace_array *tr,
9302 struct trace_scratch *tscratch, unsigned int size)
9303 {
9304 struct trace_module_delta *module_delta;
9305 struct trace_mod_entry *entry;
9306 int i, nr_entries;
9307
9308 if (!tscratch)
9309 return;
9310
9311 tr->scratch = tscratch;
9312 tr->scratch_size = size;
9313
9314 if (tscratch->text_addr)
9315 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9316
9317 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9318 goto reset;
9319
9320 /* Check if each module name is a valid string */
9321 for (i = 0; i < tscratch->nr_entries; i++) {
9322 int n;
9323
9324 entry = &tscratch->entries[i];
9325
9326 for (n = 0; n < MODULE_NAME_LEN; n++) {
9327 if (entry->mod_name[n] == '\0')
9328 break;
9329 if (!isprint(entry->mod_name[n]))
9330 goto reset;
9331 }
9332 if (n == MODULE_NAME_LEN)
9333 goto reset;
9334 }
9335
9336 /* Sort the entries so that we can find appropriate module from address. */
9337 nr_entries = tscratch->nr_entries;
9338 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9339 mod_addr_comp, NULL, NULL);
9340
9341 if (IS_ENABLED(CONFIG_MODULES)) {
9342 module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
9343 if (!module_delta) {
9344 pr_info("module_delta allocation failed. Not able to decode module address.");
9345 goto reset;
9346 }
9347 init_rcu_head(&module_delta->rcu);
9348 } else
9349 module_delta = NULL;
9350 WRITE_ONCE(tr->module_delta, module_delta);
9351
9352 /* Scan modules to make text delta for modules. */
9353 module_for_each_mod(make_mod_delta, tr);
9354
9355 /* Set trace_clock as the same of the previous boot. */
9356 if (tscratch->clock_id != tr->clock_id) {
9357 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9358 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9359 pr_info("the previous trace_clock info is not valid.");
9360 goto reset;
9361 }
9362 }
9363 return;
9364 reset:
9365 /* Invalid trace modules */
9366 memset(tscratch, 0, size);
9367 }
9368
9369 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,unsigned long size)9370 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, unsigned long size)
9371 {
9372 enum ring_buffer_flags rb_flags;
9373 struct trace_scratch *tscratch;
9374 unsigned int scratch_size = 0;
9375
9376 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
9377
9378 buf->tr = tr;
9379
9380 if (tr->range_addr_start && tr->range_addr_size) {
9381 /* Add scratch buffer to handle 128 modules */
9382 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9383 tr->range_addr_start,
9384 tr->range_addr_size,
9385 struct_size(tscratch, entries, 128));
9386
9387 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9388 setup_trace_scratch(tr, tscratch, scratch_size);
9389
9390 /*
9391 * This is basically the same as a mapped buffer,
9392 * with the same restrictions.
9393 */
9394 tr->mapped++;
9395 } else {
9396 buf->buffer = ring_buffer_alloc(size, rb_flags);
9397 }
9398 if (!buf->buffer)
9399 return -ENOMEM;
9400
9401 buf->data = alloc_percpu(struct trace_array_cpu);
9402 if (!buf->data) {
9403 ring_buffer_free(buf->buffer);
9404 buf->buffer = NULL;
9405 return -ENOMEM;
9406 }
9407
9408 /* Allocate the first page for all buffers */
9409 set_buffer_entries(&tr->array_buffer,
9410 ring_buffer_size(tr->array_buffer.buffer, 0));
9411
9412 return 0;
9413 }
9414
free_trace_buffer(struct array_buffer * buf)9415 static void free_trace_buffer(struct array_buffer *buf)
9416 {
9417 if (buf->buffer) {
9418 ring_buffer_free(buf->buffer);
9419 buf->buffer = NULL;
9420 free_percpu(buf->data);
9421 buf->data = NULL;
9422 }
9423 }
9424
allocate_trace_buffers(struct trace_array * tr,unsigned long size)9425 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
9426 {
9427 int ret;
9428
9429 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9430 if (ret)
9431 return ret;
9432
9433 #ifdef CONFIG_TRACER_SNAPSHOT
9434 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9435 if (tr->range_addr_start)
9436 return 0;
9437
9438 ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
9439 allocate_snapshot ? size : 1);
9440 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9441 free_trace_buffer(&tr->array_buffer);
9442 return -ENOMEM;
9443 }
9444 tr->allocated_snapshot = allocate_snapshot;
9445
9446 allocate_snapshot = false;
9447 #endif
9448
9449 return 0;
9450 }
9451
free_trace_buffers(struct trace_array * tr)9452 static void free_trace_buffers(struct trace_array *tr)
9453 {
9454 if (!tr)
9455 return;
9456
9457 free_trace_buffer(&tr->array_buffer);
9458 kfree(tr->module_delta);
9459
9460 #ifdef CONFIG_TRACER_SNAPSHOT
9461 free_trace_buffer(&tr->snapshot_buffer);
9462 #endif
9463 }
9464
init_trace_flags_index(struct trace_array * tr)9465 static void init_trace_flags_index(struct trace_array *tr)
9466 {
9467 int i;
9468
9469 /* Used by the trace options files */
9470 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9471 tr->trace_flags_index[i] = i;
9472 }
9473
__update_tracer(struct trace_array * tr)9474 static int __update_tracer(struct trace_array *tr)
9475 {
9476 struct tracer *t;
9477 int ret = 0;
9478
9479 for (t = trace_types; t && !ret; t = t->next)
9480 ret = add_tracer(tr, t);
9481
9482 return ret;
9483 }
9484
__update_tracer_options(struct trace_array * tr)9485 static __init int __update_tracer_options(struct trace_array *tr)
9486 {
9487 struct tracers *t;
9488 int ret = 0;
9489
9490 list_for_each_entry(t, &tr->tracers, list) {
9491 ret = add_tracer_options(tr, t);
9492 if (ret < 0)
9493 break;
9494 }
9495
9496 return ret;
9497 }
9498
update_tracer_options(void)9499 static __init void update_tracer_options(void)
9500 {
9501 struct trace_array *tr;
9502
9503 guard(mutex)(&trace_types_lock);
9504 tracer_options_updated = true;
9505 list_for_each_entry(tr, &ftrace_trace_arrays, list)
9506 __update_tracer_options(tr);
9507 }
9508
9509 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9510 struct trace_array *trace_array_find(const char *instance)
9511 {
9512 struct trace_array *tr, *found = NULL;
9513
9514 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9515 if (tr->name && strcmp(tr->name, instance) == 0) {
9516 found = tr;
9517 break;
9518 }
9519 }
9520
9521 return found;
9522 }
9523
trace_array_find_get(const char * instance)9524 struct trace_array *trace_array_find_get(const char *instance)
9525 {
9526 struct trace_array *tr;
9527
9528 guard(mutex)(&trace_types_lock);
9529 tr = trace_array_find(instance);
9530 if (tr)
9531 tr->ref++;
9532
9533 return tr;
9534 }
9535
trace_array_create_dir(struct trace_array * tr)9536 static int trace_array_create_dir(struct trace_array *tr)
9537 {
9538 int ret;
9539
9540 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9541 if (!tr->dir)
9542 return -EINVAL;
9543
9544 ret = event_trace_add_tracer(tr->dir, tr);
9545 if (ret) {
9546 tracefs_remove(tr->dir);
9547 return ret;
9548 }
9549
9550 init_tracer_tracefs(tr, tr->dir);
9551 ret = __update_tracer(tr);
9552 if (ret) {
9553 event_trace_del_tracer(tr);
9554 tracefs_remove(tr->dir);
9555 return ret;
9556 }
9557 return 0;
9558 }
9559
9560 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9561 trace_array_create_systems(const char *name, const char *systems,
9562 unsigned long range_addr_start,
9563 unsigned long range_addr_size)
9564 {
9565 struct trace_array *tr;
9566 int ret;
9567
9568 ret = -ENOMEM;
9569 tr = kzalloc_obj(*tr);
9570 if (!tr)
9571 return ERR_PTR(ret);
9572
9573 tr->name = kstrdup(name, GFP_KERNEL);
9574 if (!tr->name)
9575 goto out_free_tr;
9576
9577 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9578 goto out_free_tr;
9579
9580 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9581 goto out_free_tr;
9582
9583 if (systems) {
9584 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9585 if (!tr->system_names)
9586 goto out_free_tr;
9587 }
9588
9589 /* Only for boot up memory mapped ring buffers */
9590 tr->range_addr_start = range_addr_start;
9591 tr->range_addr_size = range_addr_size;
9592
9593 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9594
9595 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9596
9597 raw_spin_lock_init(&tr->start_lock);
9598
9599 tr->syscall_buf_sz = global_trace.syscall_buf_sz;
9600
9601 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9602 #ifdef CONFIG_TRACER_SNAPSHOT
9603 spin_lock_init(&tr->snapshot_trigger_lock);
9604 #endif
9605 tr->current_trace = &nop_trace;
9606 tr->current_trace_flags = nop_trace.flags;
9607
9608 INIT_LIST_HEAD(&tr->systems);
9609 INIT_LIST_HEAD(&tr->events);
9610 INIT_LIST_HEAD(&tr->hist_vars);
9611 INIT_LIST_HEAD(&tr->err_log);
9612 INIT_LIST_HEAD(&tr->tracers);
9613 INIT_LIST_HEAD(&tr->marker_list);
9614
9615 #ifdef CONFIG_MODULES
9616 INIT_LIST_HEAD(&tr->mod_events);
9617 #endif
9618
9619 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9620 goto out_free_tr;
9621
9622 /* The ring buffer is defaultly expanded */
9623 trace_set_ring_buffer_expanded(tr);
9624
9625 if (ftrace_allocate_ftrace_ops(tr) < 0)
9626 goto out_free_tr;
9627
9628 ftrace_init_trace_array(tr);
9629
9630 init_trace_flags_index(tr);
9631
9632 if (trace_instance_dir) {
9633 ret = trace_array_create_dir(tr);
9634 if (ret)
9635 goto out_free_tr;
9636 } else
9637 __trace_early_add_events(tr);
9638
9639 list_add(&tr->list, &ftrace_trace_arrays);
9640
9641 tr->ref++;
9642
9643 return tr;
9644
9645 out_free_tr:
9646 ftrace_free_ftrace_ops(tr);
9647 free_trace_buffers(tr);
9648 free_cpumask_var(tr->pipe_cpumask);
9649 free_cpumask_var(tr->tracing_cpumask);
9650 kfree_const(tr->system_names);
9651 kfree(tr->range_name);
9652 kfree(tr->name);
9653 kfree(tr);
9654
9655 return ERR_PTR(ret);
9656 }
9657
trace_array_create(const char * name)9658 static struct trace_array *trace_array_create(const char *name)
9659 {
9660 return trace_array_create_systems(name, NULL, 0, 0);
9661 }
9662
instance_mkdir(const char * name)9663 static int instance_mkdir(const char *name)
9664 {
9665 struct trace_array *tr;
9666 int ret;
9667
9668 guard(mutex)(&event_mutex);
9669 guard(mutex)(&trace_types_lock);
9670
9671 ret = -EEXIST;
9672 if (trace_array_find(name))
9673 return -EEXIST;
9674
9675 tr = trace_array_create(name);
9676
9677 ret = PTR_ERR_OR_ZERO(tr);
9678
9679 return ret;
9680 }
9681
9682 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9683 static u64 map_pages(unsigned long start, unsigned long size)
9684 {
9685 unsigned long vmap_start, vmap_end;
9686 struct vm_struct *area;
9687 int ret;
9688
9689 area = get_vm_area(size, VM_IOREMAP);
9690 if (!area)
9691 return 0;
9692
9693 vmap_start = (unsigned long) area->addr;
9694 vmap_end = vmap_start + size;
9695
9696 ret = vmap_page_range(vmap_start, vmap_end,
9697 start, pgprot_nx(PAGE_KERNEL));
9698 if (ret < 0) {
9699 free_vm_area(area);
9700 return 0;
9701 }
9702
9703 return (u64)vmap_start;
9704 }
9705 #else
map_pages(unsigned long start,unsigned long size)9706 static inline u64 map_pages(unsigned long start, unsigned long size)
9707 {
9708 return 0;
9709 }
9710 #endif
9711
9712 /**
9713 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9714 * @name: The name of the trace array to be looked up/created.
9715 * @systems: A list of systems to create event directories for (NULL for all)
9716 *
9717 * Returns pointer to trace array with given name.
9718 * NULL, if it cannot be created.
9719 *
9720 * NOTE: This function increments the reference counter associated with the
9721 * trace array returned. This makes sure it cannot be freed while in use.
9722 * Use trace_array_put() once the trace array is no longer needed.
9723 * If the trace_array is to be freed, trace_array_destroy() needs to
9724 * be called after the trace_array_put(), or simply let user space delete
9725 * it from the tracefs instances directory. But until the
9726 * trace_array_put() is called, user space can not delete it.
9727 *
9728 */
trace_array_get_by_name(const char * name,const char * systems)9729 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9730 {
9731 struct trace_array *tr;
9732
9733 guard(mutex)(&event_mutex);
9734 guard(mutex)(&trace_types_lock);
9735
9736 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9737 if (tr->name && strcmp(tr->name, name) == 0) {
9738 tr->ref++;
9739 return tr;
9740 }
9741 }
9742
9743 tr = trace_array_create_systems(name, systems, 0, 0);
9744
9745 if (IS_ERR(tr))
9746 tr = NULL;
9747 else
9748 tr->ref++;
9749
9750 return tr;
9751 }
9752 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9753
__remove_instance(struct trace_array * tr)9754 static int __remove_instance(struct trace_array *tr)
9755 {
9756 int i;
9757
9758 /* Reference counter for a newly created trace array = 1. */
9759 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9760 return -EBUSY;
9761
9762 list_del(&tr->list);
9763
9764 if (printk_trace == tr)
9765 update_printk_trace(&global_trace);
9766
9767 /* Must be done before disabling all the flags */
9768 if (update_marker_trace(tr, 0))
9769 synchronize_rcu();
9770
9771 /* Disable all the flags that were enabled coming in */
9772 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9773 if ((1ULL << i) & ZEROED_TRACE_FLAGS)
9774 set_tracer_flag(tr, 1ULL << i, 0);
9775 }
9776
9777 tracing_set_nop(tr);
9778 clear_ftrace_function_probes(tr);
9779 event_trace_del_tracer(tr);
9780 ftrace_clear_pids(tr);
9781 ftrace_destroy_function_files(tr);
9782 tracefs_remove(tr->dir);
9783 free_percpu(tr->last_func_repeats);
9784 free_trace_buffers(tr);
9785 clear_tracing_err_log(tr);
9786 free_tracers(tr);
9787
9788 if (tr->range_name) {
9789 reserve_mem_release_by_name(tr->range_name);
9790 kfree(tr->range_name);
9791 }
9792 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
9793 vfree((void *)tr->range_addr_start);
9794
9795 for (i = 0; i < tr->nr_topts; i++) {
9796 kfree(tr->topts[i].topts);
9797 }
9798 kfree(tr->topts);
9799
9800 free_cpumask_var(tr->pipe_cpumask);
9801 free_cpumask_var(tr->tracing_cpumask);
9802 kfree_const(tr->system_names);
9803 kfree(tr->name);
9804 kfree(tr);
9805
9806 return 0;
9807 }
9808
trace_array_destroy(struct trace_array * this_tr)9809 int trace_array_destroy(struct trace_array *this_tr)
9810 {
9811 struct trace_array *tr;
9812
9813 if (!this_tr)
9814 return -EINVAL;
9815
9816 guard(mutex)(&event_mutex);
9817 guard(mutex)(&trace_types_lock);
9818
9819
9820 /* Making sure trace array exists before destroying it. */
9821 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9822 if (tr == this_tr)
9823 return __remove_instance(tr);
9824 }
9825
9826 return -ENODEV;
9827 }
9828 EXPORT_SYMBOL_GPL(trace_array_destroy);
9829
instance_rmdir(const char * name)9830 static int instance_rmdir(const char *name)
9831 {
9832 struct trace_array *tr;
9833
9834 guard(mutex)(&event_mutex);
9835 guard(mutex)(&trace_types_lock);
9836
9837 tr = trace_array_find(name);
9838 if (!tr)
9839 return -ENODEV;
9840
9841 return __remove_instance(tr);
9842 }
9843
create_trace_instances(struct dentry * d_tracer)9844 static __init void create_trace_instances(struct dentry *d_tracer)
9845 {
9846 struct trace_array *tr;
9847
9848 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9849 instance_mkdir,
9850 instance_rmdir);
9851 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9852 return;
9853
9854 guard(mutex)(&event_mutex);
9855 guard(mutex)(&trace_types_lock);
9856
9857 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9858 if (!tr->name)
9859 continue;
9860 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9861 "Failed to create instance directory\n"))
9862 return;
9863 }
9864 }
9865
9866 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9867 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9868 {
9869 int cpu;
9870
9871 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9872 tr, &show_traces_fops);
9873
9874 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9875 tr, &set_tracer_fops);
9876
9877 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9878 tr, &tracing_cpumask_fops);
9879
9880 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9881 tr, &tracing_iter_fops);
9882
9883 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9884 tr, &tracing_fops);
9885
9886 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9887 tr, &tracing_pipe_fops);
9888
9889 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9890 tr, &tracing_entries_fops);
9891
9892 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9893 tr, &tracing_total_entries_fops);
9894
9895 trace_create_file("free_buffer", 0200, d_tracer,
9896 tr, &tracing_free_buffer_fops);
9897
9898 trace_create_file("trace_marker", 0220, d_tracer,
9899 tr, &tracing_mark_fops);
9900
9901 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9902
9903 trace_create_file("trace_marker_raw", 0220, d_tracer,
9904 tr, &tracing_mark_raw_fops);
9905
9906 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9907 &trace_clock_fops);
9908
9909 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9910 tr, &rb_simple_fops);
9911
9912 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9913 &trace_time_stamp_mode_fops);
9914
9915 tr->buffer_percent = 50;
9916
9917 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9918 tr, &buffer_percent_fops);
9919
9920 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9921 tr, &buffer_subbuf_size_fops);
9922
9923 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
9924 tr, &tracing_syscall_buf_fops);
9925
9926 create_trace_options_dir(tr);
9927
9928 trace_create_maxlat_file(tr, d_tracer);
9929
9930 if (ftrace_create_function_files(tr, d_tracer))
9931 MEM_FAIL(1, "Could not allocate function filter files");
9932
9933 if (tr->range_addr_start) {
9934 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9935 tr, &last_boot_fops);
9936 #ifdef CONFIG_TRACER_SNAPSHOT
9937 } else {
9938 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9939 tr, &snapshot_fops);
9940 #endif
9941 }
9942
9943 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9944 tr, &tracing_err_log_fops);
9945
9946 for_each_tracing_cpu(cpu)
9947 tracing_init_tracefs_percpu(tr, cpu);
9948
9949 ftrace_init_tracefs(tr, d_tracer);
9950 }
9951
9952 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)9953 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9954 {
9955 struct vfsmount *mnt;
9956 struct file_system_type *type;
9957 struct fs_context *fc;
9958 int ret;
9959
9960 /*
9961 * To maintain backward compatibility for tools that mount
9962 * debugfs to get to the tracing facility, tracefs is automatically
9963 * mounted to the debugfs/tracing directory.
9964 */
9965 type = get_fs_type("tracefs");
9966 if (!type)
9967 return NULL;
9968
9969 fc = fs_context_for_submount(type, mntpt);
9970 put_filesystem(type);
9971 if (IS_ERR(fc))
9972 return ERR_CAST(fc);
9973
9974 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
9975
9976 ret = vfs_parse_fs_string(fc, "source", "tracefs");
9977 if (!ret)
9978 mnt = fc_mount(fc);
9979 else
9980 mnt = ERR_PTR(ret);
9981
9982 put_fs_context(fc);
9983 return mnt;
9984 }
9985 #endif
9986
9987 /**
9988 * tracing_init_dentry - initialize top level trace array
9989 *
9990 * This is called when creating files or directories in the tracing
9991 * directory. It is called via fs_initcall() by any of the boot up code
9992 * and expects to return the dentry of the top level tracing directory.
9993 */
tracing_init_dentry(void)9994 int tracing_init_dentry(void)
9995 {
9996 struct trace_array *tr = &global_trace;
9997
9998 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9999 pr_warn("Tracing disabled due to lockdown\n");
10000 return -EPERM;
10001 }
10002
10003 /* The top level trace array uses NULL as parent */
10004 if (tr->dir)
10005 return 0;
10006
10007 if (WARN_ON(!tracefs_initialized()))
10008 return -ENODEV;
10009
10010 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10011 /*
10012 * As there may still be users that expect the tracing
10013 * files to exist in debugfs/tracing, we must automount
10014 * the tracefs file system there, so older tools still
10015 * work with the newer kernel.
10016 */
10017 tr->dir = debugfs_create_automount("tracing", NULL,
10018 trace_automount, NULL);
10019 #endif
10020
10021 return 0;
10022 }
10023
10024 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10025 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10026
10027 struct workqueue_struct *trace_init_wq __initdata;
10028 static struct work_struct eval_map_work __initdata;
10029 static struct work_struct tracerfs_init_work __initdata;
10030
eval_map_work_func(struct work_struct * work)10031 static void __init eval_map_work_func(struct work_struct *work)
10032 {
10033 int len;
10034
10035 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10036 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10037 }
10038
trace_eval_init(void)10039 static int __init trace_eval_init(void)
10040 {
10041 INIT_WORK(&eval_map_work, eval_map_work_func);
10042
10043 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
10044 if (!trace_init_wq) {
10045 pr_err("Unable to allocate trace_init_wq\n");
10046 /* Do work here */
10047 eval_map_work_func(&eval_map_work);
10048 return -ENOMEM;
10049 }
10050
10051 queue_work(trace_init_wq, &eval_map_work);
10052 return 0;
10053 }
10054
10055 subsys_initcall(trace_eval_init);
10056
trace_eval_sync(void)10057 static int __init trace_eval_sync(void)
10058 {
10059 /* Make sure the eval map updates are finished */
10060 if (trace_init_wq)
10061 destroy_workqueue(trace_init_wq);
10062 return 0;
10063 }
10064
10065 late_initcall_sync(trace_eval_sync);
10066
10067
10068 #ifdef CONFIG_MODULES
10069
module_exists(const char * module)10070 bool module_exists(const char *module)
10071 {
10072 /* All modules have the symbol __this_module */
10073 static const char this_mod[] = "__this_module";
10074 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10075 unsigned long val;
10076 int n;
10077
10078 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10079
10080 if (n > sizeof(modname) - 1)
10081 return false;
10082
10083 val = module_kallsyms_lookup_name(modname);
10084 return val != 0;
10085 }
10086
trace_module_add_evals(struct module * mod)10087 static void trace_module_add_evals(struct module *mod)
10088 {
10089 /*
10090 * Modules with bad taint do not have events created, do
10091 * not bother with enums either.
10092 */
10093 if (trace_module_has_bad_taint(mod))
10094 return;
10095
10096 /* Even if no trace_evals, this need to sanitize field types. */
10097 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10098 }
10099
10100 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10101 static void trace_module_remove_evals(struct module *mod)
10102 {
10103 union trace_eval_map_item *map;
10104 union trace_eval_map_item **last = &trace_eval_maps;
10105
10106 if (!mod->num_trace_evals)
10107 return;
10108
10109 guard(mutex)(&trace_eval_mutex);
10110
10111 map = trace_eval_maps;
10112
10113 while (map) {
10114 if (map->head.mod == mod)
10115 break;
10116 map = trace_eval_jmp_to_tail(map);
10117 last = &map->tail.next;
10118 map = map->tail.next;
10119 }
10120 if (!map)
10121 return;
10122
10123 *last = trace_eval_jmp_to_tail(map)->tail.next;
10124 kfree(map);
10125 }
10126 #else
trace_module_remove_evals(struct module * mod)10127 static inline void trace_module_remove_evals(struct module *mod) { }
10128 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10129
trace_module_record(struct module * mod,bool add)10130 static void trace_module_record(struct module *mod, bool add)
10131 {
10132 struct trace_array *tr;
10133 unsigned long flags;
10134
10135 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10136 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10137 /* Update any persistent trace array that has already been started */
10138 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10139 guard(mutex)(&scratch_mutex);
10140 save_mod(mod, tr);
10141 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10142 /* Update delta if the module loaded in previous boot */
10143 make_mod_delta(mod, tr);
10144 }
10145 }
10146 }
10147
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10148 static int trace_module_notify(struct notifier_block *self,
10149 unsigned long val, void *data)
10150 {
10151 struct module *mod = data;
10152
10153 switch (val) {
10154 case MODULE_STATE_COMING:
10155 trace_module_add_evals(mod);
10156 trace_module_record(mod, true);
10157 break;
10158 case MODULE_STATE_GOING:
10159 trace_module_remove_evals(mod);
10160 trace_module_record(mod, false);
10161 break;
10162 }
10163
10164 return NOTIFY_OK;
10165 }
10166
10167 static struct notifier_block trace_module_nb = {
10168 .notifier_call = trace_module_notify,
10169 .priority = 0,
10170 };
10171 #endif /* CONFIG_MODULES */
10172
tracer_init_tracefs_work_func(struct work_struct * work)10173 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10174 {
10175
10176 event_trace_init();
10177
10178 init_tracer_tracefs(&global_trace, NULL);
10179 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10180
10181 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10182 &global_trace, &tracing_thresh_fops);
10183
10184 trace_create_file("README", TRACE_MODE_READ, NULL,
10185 NULL, &tracing_readme_fops);
10186
10187 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10188 NULL, &tracing_saved_cmdlines_fops);
10189
10190 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10191 NULL, &tracing_saved_cmdlines_size_fops);
10192
10193 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10194 NULL, &tracing_saved_tgids_fops);
10195
10196 trace_create_eval_file(NULL);
10197
10198 #ifdef CONFIG_MODULES
10199 register_module_notifier(&trace_module_nb);
10200 #endif
10201
10202 #ifdef CONFIG_DYNAMIC_FTRACE
10203 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10204 NULL, &tracing_dyn_info_fops);
10205 #endif
10206
10207 create_trace_instances(NULL);
10208
10209 update_tracer_options();
10210 }
10211
tracer_init_tracefs(void)10212 static __init int tracer_init_tracefs(void)
10213 {
10214 int ret;
10215
10216 trace_access_lock_init();
10217
10218 ret = tracing_init_dentry();
10219 if (ret)
10220 return 0;
10221
10222 if (trace_init_wq) {
10223 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10224 queue_work(trace_init_wq, &tracerfs_init_work);
10225 } else {
10226 tracer_init_tracefs_work_func(NULL);
10227 }
10228
10229 if (rv_init_interface())
10230 pr_err("RV: Error while creating the RV interface\n");
10231
10232 return 0;
10233 }
10234
10235 fs_initcall(tracer_init_tracefs);
10236
10237 static int trace_die_panic_handler(struct notifier_block *self,
10238 unsigned long ev, void *unused);
10239
10240 static struct notifier_block trace_panic_notifier = {
10241 .notifier_call = trace_die_panic_handler,
10242 .priority = INT_MAX - 1,
10243 };
10244
10245 static struct notifier_block trace_die_notifier = {
10246 .notifier_call = trace_die_panic_handler,
10247 .priority = INT_MAX - 1,
10248 };
10249
10250 /*
10251 * The idea is to execute the following die/panic callback early, in order
10252 * to avoid showing irrelevant information in the trace (like other panic
10253 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10254 * warnings get disabled (to prevent potential log flooding).
10255 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10256 static int trace_die_panic_handler(struct notifier_block *self,
10257 unsigned long ev, void *unused)
10258 {
10259 if (!ftrace_dump_on_oops_enabled())
10260 return NOTIFY_DONE;
10261
10262 /* The die notifier requires DIE_OOPS to trigger */
10263 if (self == &trace_die_notifier && ev != DIE_OOPS)
10264 return NOTIFY_DONE;
10265
10266 ftrace_dump(DUMP_PARAM);
10267
10268 return NOTIFY_DONE;
10269 }
10270
10271 /*
10272 * printk is set to max of 1024, we really don't need it that big.
10273 * Nothing should be printing 1000 characters anyway.
10274 */
10275 #define TRACE_MAX_PRINT 1000
10276
10277 /*
10278 * Define here KERN_TRACE so that we have one place to modify
10279 * it if we decide to change what log level the ftrace dump
10280 * should be at.
10281 */
10282 #define KERN_TRACE KERN_EMERG
10283
10284 void
trace_printk_seq(struct trace_seq * s)10285 trace_printk_seq(struct trace_seq *s)
10286 {
10287 /* Probably should print a warning here. */
10288 if (s->seq.len >= TRACE_MAX_PRINT)
10289 s->seq.len = TRACE_MAX_PRINT;
10290
10291 /*
10292 * More paranoid code. Although the buffer size is set to
10293 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10294 * an extra layer of protection.
10295 */
10296 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10297 s->seq.len = s->seq.size - 1;
10298
10299 /* should be zero ended, but we are paranoid. */
10300 s->buffer[s->seq.len] = 0;
10301
10302 printk(KERN_TRACE "%s", s->buffer);
10303
10304 trace_seq_init(s);
10305 }
10306
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10307 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10308 {
10309 iter->tr = tr;
10310 iter->trace = iter->tr->current_trace;
10311 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10312 iter->array_buffer = &tr->array_buffer;
10313
10314 if (iter->trace && iter->trace->open)
10315 iter->trace->open(iter);
10316
10317 /* Annotate start of buffers if we had overruns */
10318 if (ring_buffer_overruns(iter->array_buffer->buffer))
10319 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10320
10321 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10322 if (trace_clocks[iter->tr->clock_id].in_ns)
10323 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10324
10325 /* Can not use kmalloc for iter.temp and iter.fmt */
10326 iter->temp = static_temp_buf;
10327 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10328 iter->fmt = static_fmt_buf;
10329 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10330 }
10331
trace_init_global_iter(struct trace_iterator * iter)10332 void trace_init_global_iter(struct trace_iterator *iter)
10333 {
10334 trace_init_iter(iter, &global_trace);
10335 }
10336
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10337 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10338 {
10339 /* use static because iter can be a bit big for the stack */
10340 static struct trace_iterator iter;
10341 unsigned int old_userobj;
10342 unsigned long flags;
10343 int cnt = 0;
10344
10345 /*
10346 * Always turn off tracing when we dump.
10347 * We don't need to show trace output of what happens
10348 * between multiple crashes.
10349 *
10350 * If the user does a sysrq-z, then they can re-enable
10351 * tracing with echo 1 > tracing_on.
10352 */
10353 tracer_tracing_off(tr);
10354
10355 local_irq_save(flags);
10356
10357 /* Simulate the iterator */
10358 trace_init_iter(&iter, tr);
10359
10360 /* While dumping, do not allow the buffer to be enable */
10361 tracer_tracing_disable(tr);
10362
10363 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
10364
10365 /* don't look at user memory in panic mode */
10366 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
10367
10368 if (dump_mode == DUMP_ORIG)
10369 iter.cpu_file = raw_smp_processor_id();
10370 else
10371 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10372
10373 if (tr == &global_trace)
10374 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10375 else
10376 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10377
10378 /* Did function tracer already get disabled? */
10379 if (ftrace_is_dead()) {
10380 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10381 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10382 }
10383
10384 /*
10385 * We need to stop all tracing on all CPUS to read
10386 * the next buffer. This is a bit expensive, but is
10387 * not done often. We fill all what we can read,
10388 * and then release the locks again.
10389 */
10390
10391 while (!trace_empty(&iter)) {
10392
10393 if (!cnt)
10394 printk(KERN_TRACE "---------------------------------\n");
10395
10396 cnt++;
10397
10398 trace_iterator_reset(&iter);
10399 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10400
10401 if (trace_find_next_entry_inc(&iter) != NULL) {
10402 int ret;
10403
10404 ret = print_trace_line(&iter);
10405 if (ret != TRACE_TYPE_NO_CONSUME)
10406 trace_consume(&iter);
10407
10408 trace_printk_seq(&iter.seq);
10409 }
10410 touch_nmi_watchdog();
10411 }
10412
10413 if (!cnt)
10414 printk(KERN_TRACE " (ftrace buffer empty)\n");
10415 else
10416 printk(KERN_TRACE "---------------------------------\n");
10417
10418 tr->trace_flags |= old_userobj;
10419
10420 tracer_tracing_enable(tr);
10421 local_irq_restore(flags);
10422 }
10423
ftrace_dump_by_param(void)10424 static void ftrace_dump_by_param(void)
10425 {
10426 bool first_param = true;
10427 char dump_param[MAX_TRACER_SIZE];
10428 char *buf, *token, *inst_name;
10429 struct trace_array *tr;
10430
10431 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10432 buf = dump_param;
10433
10434 while ((token = strsep(&buf, ",")) != NULL) {
10435 if (first_param) {
10436 first_param = false;
10437 if (!strcmp("0", token))
10438 continue;
10439 else if (!strcmp("1", token)) {
10440 ftrace_dump_one(&global_trace, DUMP_ALL);
10441 continue;
10442 }
10443 else if (!strcmp("2", token) ||
10444 !strcmp("orig_cpu", token)) {
10445 ftrace_dump_one(&global_trace, DUMP_ORIG);
10446 continue;
10447 }
10448 }
10449
10450 inst_name = strsep(&token, "=");
10451 tr = trace_array_find(inst_name);
10452 if (!tr) {
10453 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10454 continue;
10455 }
10456
10457 if (token && (!strcmp("2", token) ||
10458 !strcmp("orig_cpu", token)))
10459 ftrace_dump_one(tr, DUMP_ORIG);
10460 else
10461 ftrace_dump_one(tr, DUMP_ALL);
10462 }
10463 }
10464
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10465 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10466 {
10467 static atomic_t dump_running;
10468
10469 /* Only allow one dump user at a time. */
10470 if (atomic_inc_return(&dump_running) != 1) {
10471 atomic_dec(&dump_running);
10472 return;
10473 }
10474
10475 switch (oops_dump_mode) {
10476 case DUMP_ALL:
10477 ftrace_dump_one(&global_trace, DUMP_ALL);
10478 break;
10479 case DUMP_ORIG:
10480 ftrace_dump_one(&global_trace, DUMP_ORIG);
10481 break;
10482 case DUMP_PARAM:
10483 ftrace_dump_by_param();
10484 break;
10485 case DUMP_NONE:
10486 break;
10487 default:
10488 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10489 ftrace_dump_one(&global_trace, DUMP_ALL);
10490 }
10491
10492 atomic_dec(&dump_running);
10493 }
10494 EXPORT_SYMBOL_GPL(ftrace_dump);
10495
10496 #define WRITE_BUFSIZE 4096
10497
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10498 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10499 size_t count, loff_t *ppos,
10500 int (*createfn)(const char *))
10501 {
10502 char *kbuf __free(kfree) = NULL;
10503 char *buf, *tmp;
10504 int ret = 0;
10505 size_t done = 0;
10506 size_t size;
10507
10508 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10509 if (!kbuf)
10510 return -ENOMEM;
10511
10512 while (done < count) {
10513 size = count - done;
10514
10515 if (size >= WRITE_BUFSIZE)
10516 size = WRITE_BUFSIZE - 1;
10517
10518 if (copy_from_user(kbuf, buffer + done, size))
10519 return -EFAULT;
10520
10521 kbuf[size] = '\0';
10522 buf = kbuf;
10523 do {
10524 tmp = strchr(buf, '\n');
10525 if (tmp) {
10526 *tmp = '\0';
10527 size = tmp - buf + 1;
10528 } else {
10529 size = strlen(buf);
10530 if (done + size < count) {
10531 if (buf != kbuf)
10532 break;
10533 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10534 pr_warn("Line length is too long: Should be less than %d\n",
10535 WRITE_BUFSIZE - 2);
10536 return -EINVAL;
10537 }
10538 }
10539 done += size;
10540
10541 /* Remove comments */
10542 tmp = strchr(buf, '#');
10543
10544 if (tmp)
10545 *tmp = '\0';
10546
10547 ret = createfn(buf);
10548 if (ret)
10549 return ret;
10550 buf += size;
10551
10552 } while (done < count);
10553 }
10554 return done;
10555 }
10556
10557 #ifdef CONFIG_TRACER_SNAPSHOT
tr_needs_alloc_snapshot(const char * name)10558 __init static bool tr_needs_alloc_snapshot(const char *name)
10559 {
10560 char *test;
10561 int len = strlen(name);
10562 bool ret;
10563
10564 if (!boot_snapshot_index)
10565 return false;
10566
10567 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10568 boot_snapshot_info[len] == '\t')
10569 return true;
10570
10571 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10572 if (!test)
10573 return false;
10574
10575 sprintf(test, "\t%s\t", name);
10576 ret = strstr(boot_snapshot_info, test) == NULL;
10577 kfree(test);
10578 return ret;
10579 }
10580
do_allocate_snapshot(const char * name)10581 __init static void do_allocate_snapshot(const char *name)
10582 {
10583 if (!tr_needs_alloc_snapshot(name))
10584 return;
10585
10586 /*
10587 * When allocate_snapshot is set, the next call to
10588 * allocate_trace_buffers() (called by trace_array_get_by_name())
10589 * will allocate the snapshot buffer. That will also clear
10590 * this flag.
10591 */
10592 allocate_snapshot = true;
10593 }
10594 #else
do_allocate_snapshot(const char * name)10595 static inline void do_allocate_snapshot(const char *name) { }
10596 #endif
10597
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)10598 __init static int backup_instance_area(const char *backup,
10599 unsigned long *addr, phys_addr_t *size)
10600 {
10601 struct trace_array *backup_tr;
10602 void *allocated_vaddr = NULL;
10603
10604 backup_tr = trace_array_get_by_name(backup, NULL);
10605 if (!backup_tr) {
10606 pr_warn("Tracing: Instance %s is not found.\n", backup);
10607 return -ENOENT;
10608 }
10609
10610 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
10611 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
10612 trace_array_put(backup_tr);
10613 return -EINVAL;
10614 }
10615
10616 *size = backup_tr->range_addr_size;
10617
10618 allocated_vaddr = vzalloc(*size);
10619 if (!allocated_vaddr) {
10620 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
10621 backup, (unsigned long)*size);
10622 trace_array_put(backup_tr);
10623 return -ENOMEM;
10624 }
10625
10626 memcpy(allocated_vaddr,
10627 (void *)backup_tr->range_addr_start, (size_t)*size);
10628 *addr = (unsigned long)allocated_vaddr;
10629
10630 trace_array_put(backup_tr);
10631 return 0;
10632 }
10633
enable_instances(void)10634 __init static void enable_instances(void)
10635 {
10636 struct trace_array *tr;
10637 bool memmap_area = false;
10638 char *curr_str;
10639 char *name;
10640 char *str;
10641 char *tok;
10642
10643 /* A tab is always appended */
10644 boot_instance_info[boot_instance_index - 1] = '\0';
10645 str = boot_instance_info;
10646
10647 while ((curr_str = strsep(&str, "\t"))) {
10648 phys_addr_t start = 0;
10649 phys_addr_t size = 0;
10650 unsigned long addr = 0;
10651 bool traceprintk = false;
10652 bool traceoff = false;
10653 char *flag_delim;
10654 char *addr_delim;
10655 char *rname __free(kfree) = NULL;
10656 char *backup;
10657
10658 tok = strsep(&curr_str, ",");
10659
10660 name = strsep(&tok, "=");
10661 backup = tok;
10662
10663 flag_delim = strchr(name, '^');
10664 addr_delim = strchr(name, '@');
10665
10666 if (addr_delim)
10667 *addr_delim++ = '\0';
10668
10669 if (flag_delim)
10670 *flag_delim++ = '\0';
10671
10672 if (backup) {
10673 if (backup_instance_area(backup, &addr, &size) < 0)
10674 continue;
10675 }
10676
10677 if (flag_delim) {
10678 char *flag;
10679
10680 while ((flag = strsep(&flag_delim, "^"))) {
10681 if (strcmp(flag, "traceoff") == 0) {
10682 traceoff = true;
10683 } else if ((strcmp(flag, "printk") == 0) ||
10684 (strcmp(flag, "traceprintk") == 0) ||
10685 (strcmp(flag, "trace_printk") == 0)) {
10686 traceprintk = true;
10687 } else {
10688 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10689 flag, name);
10690 }
10691 }
10692 }
10693
10694 tok = addr_delim;
10695 if (tok && isdigit(*tok)) {
10696 start = memparse(tok, &tok);
10697 if (!start) {
10698 pr_warn("Tracing: Invalid boot instance address for %s\n",
10699 name);
10700 continue;
10701 }
10702 if (*tok != ':') {
10703 pr_warn("Tracing: No size specified for instance %s\n", name);
10704 continue;
10705 }
10706 tok++;
10707 size = memparse(tok, &tok);
10708 if (!size) {
10709 pr_warn("Tracing: Invalid boot instance size for %s\n",
10710 name);
10711 continue;
10712 }
10713 memmap_area = true;
10714 } else if (tok) {
10715 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10716 start = 0;
10717 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10718 continue;
10719 }
10720 rname = kstrdup(tok, GFP_KERNEL);
10721 }
10722
10723 if (start) {
10724 /* Start and size must be page aligned */
10725 if (start & ~PAGE_MASK) {
10726 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10727 continue;
10728 }
10729 if (size & ~PAGE_MASK) {
10730 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10731 continue;
10732 }
10733
10734 if (memmap_area)
10735 addr = map_pages(start, size);
10736 else
10737 addr = (unsigned long)phys_to_virt(start);
10738 if (addr) {
10739 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10740 name, &start, (unsigned long)size);
10741 } else {
10742 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10743 continue;
10744 }
10745 } else {
10746 /* Only non mapped buffers have snapshot buffers */
10747 if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT))
10748 do_allocate_snapshot(name);
10749 }
10750
10751 tr = trace_array_create_systems(name, NULL, addr, size);
10752 if (IS_ERR(tr)) {
10753 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10754 continue;
10755 }
10756
10757 if (traceoff)
10758 tracer_tracing_off(tr);
10759
10760 if (traceprintk)
10761 update_printk_trace(tr);
10762
10763 /*
10764 * memmap'd buffers can not be freed.
10765 */
10766 if (memmap_area) {
10767 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10768 tr->ref++;
10769 }
10770
10771 /*
10772 * Backup buffers can be freed but need vfree().
10773 */
10774 if (backup)
10775 tr->flags |= TRACE_ARRAY_FL_VMALLOC;
10776
10777 if (start || backup) {
10778 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10779 tr->range_name = no_free_ptr(rname);
10780 }
10781
10782 while ((tok = strsep(&curr_str, ","))) {
10783 early_enable_events(tr, tok, true);
10784 }
10785 }
10786 }
10787
tracer_alloc_buffers(void)10788 __init static int tracer_alloc_buffers(void)
10789 {
10790 unsigned long ring_buf_size;
10791 int ret = -ENOMEM;
10792
10793
10794 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10795 pr_warn("Tracing disabled due to lockdown\n");
10796 return -EPERM;
10797 }
10798
10799 /*
10800 * Make sure we don't accidentally add more trace options
10801 * than we have bits for.
10802 */
10803 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10804
10805 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10806 return -ENOMEM;
10807
10808 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10809 goto out_free_buffer_mask;
10810
10811 /* Only allocate trace_printk buffers if a trace_printk exists */
10812 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10813 /* Must be called before global_trace.buffer is allocated */
10814 trace_printk_init_buffers();
10815
10816 /* To save memory, keep the ring buffer size to its minimum */
10817 if (global_trace.ring_buffer_expanded)
10818 ring_buf_size = trace_buf_size;
10819 else
10820 ring_buf_size = 1;
10821
10822 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10823 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10824
10825 raw_spin_lock_init(&global_trace.start_lock);
10826
10827 /*
10828 * The prepare callbacks allocates some memory for the ring buffer. We
10829 * don't free the buffer if the CPU goes down. If we were to free
10830 * the buffer, then the user would lose any trace that was in the
10831 * buffer. The memory will be removed once the "instance" is removed.
10832 */
10833 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10834 "trace/RB:prepare", trace_rb_cpu_prepare,
10835 NULL);
10836 if (ret < 0)
10837 goto out_free_cpumask;
10838 /* Used for event triggers */
10839 ret = -ENOMEM;
10840 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10841 if (!temp_buffer)
10842 goto out_rm_hp_state;
10843
10844 if (trace_create_savedcmd() < 0)
10845 goto out_free_temp_buffer;
10846
10847 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10848 goto out_free_savedcmd;
10849
10850 /* TODO: make the number of buffers hot pluggable with CPUS */
10851 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10852 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10853 goto out_free_pipe_cpumask;
10854 }
10855 if (global_trace.buffer_disabled)
10856 tracing_off();
10857
10858 if (trace_boot_clock) {
10859 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10860 if (ret < 0)
10861 pr_warn("Trace clock %s not defined, going back to default\n",
10862 trace_boot_clock);
10863 }
10864
10865 /*
10866 * register_tracer() might reference current_trace, so it
10867 * needs to be set before we register anything. This is
10868 * just a bootstrap of current_trace anyway.
10869 */
10870 global_trace.current_trace = &nop_trace;
10871 global_trace.current_trace_flags = nop_trace.flags;
10872
10873 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10874 #ifdef CONFIG_TRACER_SNAPSHOT
10875 spin_lock_init(&global_trace.snapshot_trigger_lock);
10876 #endif
10877 ftrace_init_global_array_ops(&global_trace);
10878
10879 #ifdef CONFIG_MODULES
10880 INIT_LIST_HEAD(&global_trace.mod_events);
10881 #endif
10882
10883 init_trace_flags_index(&global_trace);
10884
10885 INIT_LIST_HEAD(&global_trace.tracers);
10886
10887 /* All seems OK, enable tracing */
10888 tracing_disabled = 0;
10889
10890 atomic_notifier_chain_register(&panic_notifier_list,
10891 &trace_panic_notifier);
10892
10893 register_die_notifier(&trace_die_notifier);
10894
10895 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10896
10897 global_trace.syscall_buf_sz = syscall_buf_size;
10898
10899 INIT_LIST_HEAD(&global_trace.systems);
10900 INIT_LIST_HEAD(&global_trace.events);
10901 INIT_LIST_HEAD(&global_trace.hist_vars);
10902 INIT_LIST_HEAD(&global_trace.err_log);
10903 list_add(&global_trace.marker_list, &marker_copies);
10904 list_add(&global_trace.list, &ftrace_trace_arrays);
10905
10906 register_tracer(&nop_trace);
10907
10908 /* Function tracing may start here (via kernel command line) */
10909 init_function_trace();
10910
10911 apply_trace_boot_options();
10912
10913 register_snapshot_cmd();
10914
10915 return 0;
10916
10917 out_free_pipe_cpumask:
10918 free_cpumask_var(global_trace.pipe_cpumask);
10919 out_free_savedcmd:
10920 trace_free_saved_cmdlines_buffer();
10921 out_free_temp_buffer:
10922 ring_buffer_free(temp_buffer);
10923 out_rm_hp_state:
10924 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10925 out_free_cpumask:
10926 free_cpumask_var(global_trace.tracing_cpumask);
10927 out_free_buffer_mask:
10928 free_cpumask_var(tracing_buffer_mask);
10929 return ret;
10930 }
10931
10932 #ifdef CONFIG_FUNCTION_TRACER
10933 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)10934 struct trace_array *trace_get_global_array(void)
10935 {
10936 return &global_trace;
10937 }
10938 #endif
10939
ftrace_boot_snapshot(void)10940 void __init ftrace_boot_snapshot(void)
10941 {
10942 #ifdef CONFIG_TRACER_SNAPSHOT
10943 struct trace_array *tr;
10944
10945 if (!snapshot_at_boot)
10946 return;
10947
10948 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10949 if (!tr->allocated_snapshot)
10950 continue;
10951
10952 tracing_snapshot_instance(tr);
10953 trace_array_puts(tr, "** Boot snapshot taken **\n");
10954 }
10955 #endif
10956 }
10957
early_trace_init(void)10958 void __init early_trace_init(void)
10959 {
10960 if (tracepoint_printk) {
10961 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
10962 if (MEM_FAIL(!tracepoint_print_iter,
10963 "Failed to allocate trace iterator\n"))
10964 tracepoint_printk = 0;
10965 else
10966 static_key_enable(&tracepoint_printk_key.key);
10967 }
10968 tracer_alloc_buffers();
10969
10970 init_events();
10971 }
10972
trace_init(void)10973 void __init trace_init(void)
10974 {
10975 trace_event_init();
10976
10977 if (boot_instance_index)
10978 enable_instances();
10979 }
10980
clear_boot_tracer(void)10981 __init static void clear_boot_tracer(void)
10982 {
10983 /*
10984 * The default tracer at boot buffer is an init section.
10985 * This function is called in lateinit. If we did not
10986 * find the boot tracer, then clear it out, to prevent
10987 * later registration from accessing the buffer that is
10988 * about to be freed.
10989 */
10990 if (!default_bootup_tracer)
10991 return;
10992
10993 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10994 default_bootup_tracer);
10995 default_bootup_tracer = NULL;
10996 }
10997
10998 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10999 __init static void tracing_set_default_clock(void)
11000 {
11001 /* sched_clock_stable() is determined in late_initcall */
11002 if (!trace_boot_clock && !sched_clock_stable()) {
11003 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11004 pr_warn("Can not set tracing clock due to lockdown\n");
11005 return;
11006 }
11007
11008 printk(KERN_WARNING
11009 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11010 "If you want to keep using the local clock, then add:\n"
11011 " \"trace_clock=local\"\n"
11012 "on the kernel command line\n");
11013 tracing_set_clock(&global_trace, "global");
11014 }
11015 }
11016 #else
tracing_set_default_clock(void)11017 static inline void tracing_set_default_clock(void) { }
11018 #endif
11019
late_trace_init(void)11020 __init static int late_trace_init(void)
11021 {
11022 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11023 static_key_disable(&tracepoint_printk_key.key);
11024 tracepoint_printk = 0;
11025 }
11026
11027 if (traceoff_after_boot)
11028 tracing_off();
11029
11030 tracing_set_default_clock();
11031 clear_boot_tracer();
11032 return 0;
11033 }
11034
11035 late_initcall_sync(late_trace_init);
11036