1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58
59 #include "trace.h"
60 #include "trace_output.h"
61
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
69 */
70 static bool __read_mostly tracing_selftest_running;
71
72 /*
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
75 */
76 bool __read_mostly tracing_selftest_disabled;
77
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 }
84 }
85 #else
86 #define tracing_selftest_running 0
87 #define tracing_selftest_disabled 0
88 #endif
89
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 struct list_head list;
100 struct tracer *tracer;
101 struct tracer_flags *flags;
102 };
103
104 /*
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
107 * occurred.
108 */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
115 * this back to zero.
116 */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly tracing_buffer_mask;
120
121 #define MAX_TRACER_SIZE 100
122 /*
123 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124 *
125 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126 * is set, then ftrace_dump is called. This will output the contents
127 * of the ftrace buffers to the console. This is very useful for
128 * capturing traces that lead to crashes and outputting it to a
129 * serial console.
130 *
131 * It is default off, but you can enable it with either specifying
132 * "ftrace_dump_on_oops" in the kernel command line, or setting
133 * /proc/sys/kernel/ftrace_dump_on_oops
134 * Set 1 if you want to dump buffers of all CPUs
135 * Set 2 if you want to dump the buffer of the CPU that triggered oops
136 * Set instance name if you want to dump the specific trace instance
137 * Multiple instance dump is also supported, and instances are separated
138 * by commas.
139 */
140 /* Set to string format zero to disable by default */
141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 {
150 .procname = "ftrace_dump_on_oops",
151 .data = &ftrace_dump_on_oops,
152 .maxlen = MAX_TRACER_SIZE,
153 .mode = 0644,
154 .proc_handler = proc_dostring,
155 },
156 {
157 .procname = "traceoff_on_warning",
158 .data = &__disable_trace_on_warning,
159 .maxlen = sizeof(__disable_trace_on_warning),
160 .mode = 0644,
161 .proc_handler = proc_dointvec,
162 },
163 {
164 .procname = "tracepoint_printk",
165 .data = &tracepoint_printk,
166 .maxlen = sizeof(tracepoint_printk),
167 .mode = 0644,
168 .proc_handler = tracepoint_printk_sysctl,
169 },
170 };
171
init_trace_sysctls(void)172 static int __init init_trace_sysctls(void)
173 {
174 register_sysctl_init("kernel", trace_sysctl_table);
175 return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 struct module *mod;
183 unsigned long length;
184 };
185
186 union trace_eval_map_item;
187
188 struct trace_eval_map_tail {
189 /*
190 * "end" is first and points to NULL as it must be different
191 * than "mod" or "eval_string"
192 */
193 union trace_eval_map_item *next;
194 const char *end; /* points to NULL */
195 };
196
197 static DEFINE_MUTEX(trace_eval_mutex);
198
199 /*
200 * The trace_eval_maps are saved in an array with two extra elements,
201 * one at the beginning, and one at the end. The beginning item contains
202 * the count of the saved maps (head.length), and the module they
203 * belong to if not built in (head.mod). The ending item contains a
204 * pointer to the next array of saved eval_map items.
205 */
206 union trace_eval_map_item {
207 struct trace_eval_map map;
208 struct trace_eval_map_head head;
209 struct trace_eval_map_tail tail;
210 };
211
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 struct trace_buffer *buffer,
218 unsigned int trace_ctx);
219
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231
set_cmdline_ftrace(char * str)232 static int __init set_cmdline_ftrace(char *str)
233 {
234 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 default_bootup_tracer = bootup_tracer_buf;
236 /* We are using ftrace early, expand it */
237 trace_set_ring_buffer_expanded(NULL);
238 return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241
ftrace_dump_on_oops_enabled(void)242 int ftrace_dump_on_oops_enabled(void)
243 {
244 if (!strcmp("0", ftrace_dump_on_oops))
245 return 0;
246 else
247 return 1;
248 }
249
set_ftrace_dump_on_oops(char * str)250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 if (!*str) {
253 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 return 1;
255 }
256
257 if (*str == ',') {
258 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 return 1;
261 }
262
263 if (*str++ == '=') {
264 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 return 1;
266 }
267
268 return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271
stop_trace_on_warning(char * str)272 static int __init stop_trace_on_warning(char *str)
273 {
274 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 __disable_trace_on_warning = 1;
276 return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279
boot_alloc_snapshot(char * str)280 static int __init boot_alloc_snapshot(char *str)
281 {
282 char *slot = boot_snapshot_info + boot_snapshot_index;
283 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 int ret;
285
286 if (str[0] == '=') {
287 str++;
288 if (strlen(str) >= left)
289 return -1;
290
291 ret = snprintf(slot, left, "%s\t", str);
292 boot_snapshot_index += ret;
293 } else {
294 allocate_snapshot = true;
295 /* We also need the main ring buffer expanded */
296 trace_set_ring_buffer_expanded(NULL);
297 }
298 return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301
302
boot_snapshot(char * str)303 static int __init boot_snapshot(char *str)
304 {
305 snapshot_at_boot = true;
306 boot_alloc_snapshot(str);
307 return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310
311
boot_instance(char * str)312 static int __init boot_instance(char *str)
313 {
314 char *slot = boot_instance_info + boot_instance_index;
315 int left = sizeof(boot_instance_info) - boot_instance_index;
316 int ret;
317
318 if (strlen(str) >= left)
319 return -1;
320
321 ret = snprintf(slot, left, "%s\t", str);
322 boot_instance_index += ret;
323
324 return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327
328
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330
set_trace_boot_options(char * str)331 static int __init set_trace_boot_options(char *str)
332 {
333 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340
set_trace_boot_clock(char * str)341 static int __init set_trace_boot_clock(char *str)
342 {
343 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 trace_boot_clock = trace_boot_clock_buf;
345 return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348
set_tracepoint_printk(char * str)349 static int __init set_tracepoint_printk(char *str)
350 {
351 /* Ignore the "tp_printk_stop_on_boot" param */
352 if (*str == '_')
353 return 0;
354
355 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 tracepoint_printk = 1;
357 return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360
set_tracepoint_printk_stop(char * str)361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 tracepoint_printk_stop_on_boot = true;
364 return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367
set_traceoff_after_boot(char * str)368 static int __init set_traceoff_after_boot(char *str)
369 {
370 traceoff_after_boot = true;
371 return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374
ns2usecs(u64 nsec)375 unsigned long long ns2usecs(u64 nsec)
376 {
377 nsec += 500;
378 do_div(nsec, 1000);
379 return nsec;
380 }
381
382 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)383 trace_process_export(struct trace_export *export,
384 struct ring_buffer_event *event, int flag)
385 {
386 struct trace_entry *entry;
387 unsigned int size = 0;
388
389 if (export->flags & flag) {
390 entry = ring_buffer_event_data(event);
391 size = ring_buffer_event_length(event);
392 export->write(export, entry, size);
393 }
394 }
395
396 static DEFINE_MUTEX(ftrace_export_lock);
397
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403
ftrace_exports_enable(struct trace_export * export)404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 if (export->flags & TRACE_EXPORT_FUNCTION)
407 static_branch_inc(&trace_function_exports_enabled);
408
409 if (export->flags & TRACE_EXPORT_EVENT)
410 static_branch_inc(&trace_event_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_MARKER)
413 static_branch_inc(&trace_marker_exports_enabled);
414 }
415
ftrace_exports_disable(struct trace_export * export)416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 if (export->flags & TRACE_EXPORT_FUNCTION)
419 static_branch_dec(&trace_function_exports_enabled);
420
421 if (export->flags & TRACE_EXPORT_EVENT)
422 static_branch_dec(&trace_event_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_MARKER)
425 static_branch_dec(&trace_marker_exports_enabled);
426 }
427
ftrace_exports(struct ring_buffer_event * event,int flag)428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 struct trace_export *export;
431
432 guard(preempt_notrace)();
433
434 export = rcu_dereference_raw_check(ftrace_exports_list);
435 while (export) {
436 trace_process_export(export, event, flag);
437 export = rcu_dereference_raw_check(export->next);
438 }
439 }
440
441 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 rcu_assign_pointer(export->next, *list);
445 /*
446 * We are entering export into the list but another
447 * CPU might be walking that list. We need to make sure
448 * the export->next pointer is valid before another CPU sees
449 * the export pointer included into the list.
450 */
451 rcu_assign_pointer(*list, export);
452 }
453
454 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 struct trace_export **p;
458
459 for (p = list; *p != NULL; p = &(*p)->next)
460 if (*p == export)
461 break;
462
463 if (*p != export)
464 return -1;
465
466 rcu_assign_pointer(*p, (*p)->next);
467
468 return 0;
469 }
470
471 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 ftrace_exports_enable(export);
475
476 add_trace_export(list, export);
477 }
478
479 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 int ret;
483
484 ret = rm_trace_export(list, export);
485 ftrace_exports_disable(export);
486
487 return ret;
488 }
489
register_ftrace_export(struct trace_export * export)490 int register_ftrace_export(struct trace_export *export)
491 {
492 if (WARN_ON_ONCE(!export->write))
493 return -1;
494
495 guard(mutex)(&ftrace_export_lock);
496
497 add_ftrace_export(&ftrace_exports_list, export);
498
499 return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502
unregister_ftrace_export(struct trace_export * export)503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 guard(mutex)(&ftrace_export_lock);
506 return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS \
512 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
513 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
514 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
515 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
516 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
517 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
518 TRACE_ITER(COPY_MARKER))
519
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
522 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
523 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 TRACE_ITER(COPY_MARKER))
529
530 /*
531 * The global_trace is the descriptor that holds the top-level tracing
532 * buffers for the live tracing.
533 */
534 static struct trace_array global_trace = {
535 .trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537
538 static struct trace_array *printk_trace = &global_trace;
539
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542
printk_binsafe(struct trace_array * tr)543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 /*
546 * The binary format of traceprintk can cause a crash if used
547 * by a buffer from another boot. Force the use of the
548 * non binary version of trace_printk if the trace_printk
549 * buffer is a boot mapped ring buffer.
550 */
551 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553
update_printk_trace(struct trace_array * tr)554 static void update_printk_trace(struct trace_array *tr)
555 {
556 if (printk_trace == tr)
557 return;
558
559 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 printk_trace = tr;
561 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563
564 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 lockdep_assert_held(&event_mutex);
568
569 if (enabled) {
570 if (!list_empty(&tr->marker_list))
571 return false;
572
573 list_add_rcu(&tr->marker_list, &marker_copies);
574 tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 return true;
576 }
577
578 if (list_empty(&tr->marker_list))
579 return false;
580
581 list_del_init(&tr->marker_list);
582 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 return true;
584 }
585
trace_set_ring_buffer_expanded(struct trace_array * tr)586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 if (!tr)
589 tr = &global_trace;
590 tr->ring_buffer_expanded = true;
591 }
592
593 LIST_HEAD(ftrace_trace_arrays);
594
trace_array_get(struct trace_array * this_tr)595 int trace_array_get(struct trace_array *this_tr)
596 {
597 struct trace_array *tr;
598
599 guard(mutex)(&trace_types_lock);
600 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 if (tr == this_tr) {
602 tr->ref++;
603 return 0;
604 }
605 }
606
607 return -ENODEV;
608 }
609
__trace_array_put(struct trace_array * this_tr)610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 WARN_ON(!this_tr->ref);
613 this_tr->ref--;
614 }
615
616 /**
617 * trace_array_put - Decrement the reference counter for this trace array.
618 * @this_tr : pointer to the trace array
619 *
620 * NOTE: Use this when we no longer need the trace array returned by
621 * trace_array_get_by_name(). This ensures the trace array can be later
622 * destroyed.
623 *
624 */
trace_array_put(struct trace_array * this_tr)625 void trace_array_put(struct trace_array *this_tr)
626 {
627 if (!this_tr)
628 return;
629
630 guard(mutex)(&trace_types_lock);
631 __trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634
tracing_check_open_get_tr(struct trace_array * tr)635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 int ret;
638
639 ret = security_locked_down(LOCKDOWN_TRACEFS);
640 if (ret)
641 return ret;
642
643 if (tracing_disabled)
644 return -ENODEV;
645
646 if (tr && trace_array_get(tr) < 0)
647 return -ENODEV;
648
649 return 0;
650 }
651
652 /**
653 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654 * @filtered_pids: The list of pids to check
655 * @search_pid: The PID to find in @filtered_pids
656 *
657 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658 */
659 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664
665 /**
666 * trace_ignore_this_task - should a task be ignored for tracing
667 * @filtered_pids: The list of pids to check
668 * @filtered_no_pids: The list of pids not to be traced
669 * @task: The task that should be ignored if not filtered
670 *
671 * Checks if @task should be traced or not from @filtered_pids.
672 * Returns true if @task should *NOT* be traced.
673 * Returns false if @task should be traced.
674 */
675 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 struct trace_pid_list *filtered_no_pids,
678 struct task_struct *task)
679 {
680 /*
681 * If filtered_no_pids is not empty, and the task's pid is listed
682 * in filtered_no_pids, then return true.
683 * Otherwise, if filtered_pids is empty, that means we can
684 * trace all tasks. If it has content, then only trace pids
685 * within filtered_pids.
686 */
687
688 return (filtered_pids &&
689 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 (filtered_no_pids &&
691 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693
694 /**
695 * trace_filter_add_remove_task - Add or remove a task from a pid_list
696 * @pid_list: The list to modify
697 * @self: The current task for fork or NULL for exit
698 * @task: The task to add or remove
699 *
700 * If adding a task, if @self is defined, the task is only added if @self
701 * is also included in @pid_list. This happens on fork and tasks should
702 * only be added when the parent is listed. If @self is NULL, then the
703 * @task pid will be removed from the list, which would happen on exit
704 * of a task.
705 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 struct task_struct *self,
708 struct task_struct *task)
709 {
710 if (!pid_list)
711 return;
712
713 /* For forks, we only add if the forking task is listed */
714 if (self) {
715 if (!trace_find_filtered_pid(pid_list, self->pid))
716 return;
717 }
718
719 /* "self" is set for forks, and NULL for exits */
720 if (self)
721 trace_pid_list_set(pid_list, task->pid);
722 else
723 trace_pid_list_clear(pid_list, task->pid);
724 }
725
726 /**
727 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728 * @pid_list: The pid list to show
729 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730 * @pos: The position of the file
731 *
732 * This is used by the seq_file "next" operation to iterate the pids
733 * listed in a trace_pid_list structure.
734 *
735 * Returns the pid+1 as we want to display pid of zero, but NULL would
736 * stop the iteration.
737 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 long pid = (unsigned long)v;
741 unsigned int next;
742
743 (*pos)++;
744
745 /* pid already is +1 of the actual previous bit */
746 if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 return NULL;
748
749 pid = next;
750
751 /* Return pid + 1 to allow zero to be represented */
752 return (void *)(pid + 1);
753 }
754
755 /**
756 * trace_pid_start - Used for seq_file to start reading pid lists
757 * @pid_list: The pid list to show
758 * @pos: The position of the file
759 *
760 * This is used by seq_file "start" operation to start the iteration
761 * of listing pids.
762 *
763 * Returns the pid+1 as we want to display pid of zero, but NULL would
764 * stop the iteration.
765 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 unsigned long pid;
769 unsigned int first;
770 loff_t l = 0;
771
772 if (trace_pid_list_first(pid_list, &first) < 0)
773 return NULL;
774
775 pid = first;
776
777 /* Return pid + 1 so that zero can be the exit value */
778 for (pid++; pid && l < *pos;
779 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 ;
781 return (void *)pid;
782 }
783
784 /**
785 * trace_pid_show - show the current pid in seq_file processing
786 * @m: The seq_file structure to write into
787 * @v: A void pointer of the pid (+1) value to display
788 *
789 * Can be directly used by seq_file operations to display the current
790 * pid value.
791 */
trace_pid_show(struct seq_file * m,void * v)792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 unsigned long pid = (unsigned long)v - 1;
795
796 seq_printf(m, "%lu\n", pid);
797 return 0;
798 }
799
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE 127
802
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 struct trace_pid_list **new_pid_list,
805 const char __user *ubuf, size_t cnt)
806 {
807 struct trace_pid_list *pid_list;
808 struct trace_parser parser;
809 unsigned long val;
810 int nr_pids = 0;
811 ssize_t read = 0;
812 ssize_t ret;
813 loff_t pos;
814 pid_t pid;
815
816 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 return -ENOMEM;
818
819 /*
820 * Always recreate a new array. The write is an all or nothing
821 * operation. Always create a new array when adding new pids by
822 * the user. If the operation fails, then the current list is
823 * not modified.
824 */
825 pid_list = trace_pid_list_alloc();
826 if (!pid_list) {
827 trace_parser_put(&parser);
828 return -ENOMEM;
829 }
830
831 if (filtered_pids) {
832 /* copy the current bits to the new max */
833 ret = trace_pid_list_first(filtered_pids, &pid);
834 while (!ret) {
835 ret = trace_pid_list_set(pid_list, pid);
836 if (ret < 0)
837 goto out;
838
839 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 nr_pids++;
841 }
842 }
843
844 ret = 0;
845 while (cnt > 0) {
846
847 pos = 0;
848
849 ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 if (ret < 0)
851 break;
852
853 read += ret;
854 ubuf += ret;
855 cnt -= ret;
856
857 if (!trace_parser_loaded(&parser))
858 break;
859
860 ret = -EINVAL;
861 if (kstrtoul(parser.buffer, 0, &val))
862 break;
863
864 pid = (pid_t)val;
865
866 if (trace_pid_list_set(pid_list, pid) < 0) {
867 ret = -1;
868 break;
869 }
870 nr_pids++;
871
872 trace_parser_clear(&parser);
873 ret = 0;
874 }
875 out:
876 trace_parser_put(&parser);
877
878 if (ret < 0) {
879 trace_pid_list_free(pid_list);
880 return ret;
881 }
882
883 if (!nr_pids) {
884 /* Cleared the list of pids */
885 trace_pid_list_free(pid_list);
886 pid_list = NULL;
887 }
888
889 *new_pid_list = pid_list;
890
891 return read;
892 }
893
buffer_ftrace_now(struct array_buffer * buf,int cpu)894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 u64 ts;
897
898 /* Early boot up does not have a buffer yet */
899 if (!buf->buffer)
900 return trace_clock_local();
901
902 ts = ring_buffer_time_stamp(buf->buffer);
903 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904
905 return ts;
906 }
907
ftrace_now(int cpu)908 u64 ftrace_now(int cpu)
909 {
910 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912
913 /**
914 * tracing_is_enabled - Show if global_trace has been enabled
915 *
916 * Shows if the global trace has been enabled or not. It uses the
917 * mirror flag "buffer_disabled" to be used in fast paths such as for
918 * the irqsoff tracer. But it may be inaccurate due to races. If you
919 * need to know the accurate state, use tracing_is_on() which is a little
920 * slower, but accurate.
921 */
tracing_is_enabled(void)922 int tracing_is_enabled(void)
923 {
924 /*
925 * For quick access (irqsoff uses this in fast path), just
926 * return the mirror variable of the state of the ring buffer.
927 * It's a little racy, but we don't really care.
928 */
929 return !global_trace.buffer_disabled;
930 }
931
932 /*
933 * trace_buf_size is the size in bytes that is allocated
934 * for a buffer. Note, the number of bytes is always rounded
935 * to page size.
936 *
937 * This number is purposely set to a low number of 16384.
938 * If the dump on oops happens, it will be much appreciated
939 * to not have to wait for all that output. Anyway this can be
940 * boot time and run time configurable.
941 */
942 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
943
944 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer *trace_types __read_mostly;
948
949 /*
950 * trace_types_lock is used to protect the trace_types list.
951 */
952 DEFINE_MUTEX(trace_types_lock);
953
954 /*
955 * serialize the access of the ring buffer
956 *
957 * ring buffer serializes readers, but it is low level protection.
958 * The validity of the events (which returns by ring_buffer_peek() ..etc)
959 * are not protected by ring buffer.
960 *
961 * The content of events may become garbage if we allow other process consumes
962 * these events concurrently:
963 * A) the page of the consumed events may become a normal page
964 * (not reader page) in ring buffer, and this page will be rewritten
965 * by events producer.
966 * B) The page of the consumed events may become a page for splice_read,
967 * and this page will be returned to system.
968 *
969 * These primitives allow multi process access to different cpu ring buffer
970 * concurrently.
971 *
972 * These primitives don't distinguish read-only and read-consume access.
973 * Multi read-only access are also serialized.
974 */
975
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979
trace_access_lock(int cpu)980 static inline void trace_access_lock(int cpu)
981 {
982 if (cpu == RING_BUFFER_ALL_CPUS) {
983 /* gain it for accessing the whole ring buffer. */
984 down_write(&all_cpu_access_lock);
985 } else {
986 /* gain it for accessing a cpu ring buffer. */
987
988 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 down_read(&all_cpu_access_lock);
990
991 /* Secondly block other access to this @cpu ring buffer. */
992 mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 }
994 }
995
trace_access_unlock(int cpu)996 static inline void trace_access_unlock(int cpu)
997 {
998 if (cpu == RING_BUFFER_ALL_CPUS) {
999 up_write(&all_cpu_access_lock);
1000 } else {
1001 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 up_read(&all_cpu_access_lock);
1003 }
1004 }
1005
trace_access_lock_init(void)1006 static inline void trace_access_lock_init(void)
1007 {
1008 int cpu;
1009
1010 for_each_possible_cpu(cpu)
1011 mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013
1014 #else
1015
1016 static DEFINE_MUTEX(access_lock);
1017
trace_access_lock(int cpu)1018 static inline void trace_access_lock(int cpu)
1019 {
1020 (void)cpu;
1021 mutex_lock(&access_lock);
1022 }
1023
trace_access_unlock(int cpu)1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 (void)cpu;
1027 mutex_unlock(&access_lock);
1028 }
1029
trace_access_lock_init(void)1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033
1034 #endif
1035
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 struct trace_buffer *buffer,
1039 unsigned int trace_ctx,
1040 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 struct trace_buffer *buffer,
1043 unsigned int trace_ctx,
1044 int skip, struct pt_regs *regs);
1045
1046 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 struct trace_buffer *buffer,
1049 unsigned int trace_ctx,
1050 int skip, struct pt_regs *regs)
1051 {
1052 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 struct trace_buffer *buffer,
1055 unsigned long trace_ctx,
1056 int skip, struct pt_regs *regs)
1057 {
1058 }
1059
1060 #endif
1061
1062 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1063 trace_event_setup(struct ring_buffer_event *event,
1064 int type, unsigned int trace_ctx)
1065 {
1066 struct trace_entry *ent = ring_buffer_event_data(event);
1067
1068 tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070
1071 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 int type,
1074 unsigned long len,
1075 unsigned int trace_ctx)
1076 {
1077 struct ring_buffer_event *event;
1078
1079 event = ring_buffer_lock_reserve(buffer, len);
1080 if (event != NULL)
1081 trace_event_setup(event, type, trace_ctx);
1082
1083 return event;
1084 }
1085
tracer_tracing_on(struct trace_array * tr)1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 if (tr->array_buffer.buffer)
1089 ring_buffer_record_on(tr->array_buffer.buffer);
1090 /*
1091 * This flag is looked at when buffers haven't been allocated
1092 * yet, or by some tracers (like irqsoff), that just want to
1093 * know if the ring buffer has been disabled, but it can handle
1094 * races of where it gets disabled but we still do a record.
1095 * As the check is in the fast path of the tracers, it is more
1096 * important to be fast than accurate.
1097 */
1098 tr->buffer_disabled = 0;
1099 }
1100
1101 /**
1102 * tracing_on - enable tracing buffers
1103 *
1104 * This function enables tracing buffers that may have been
1105 * disabled with tracing_off.
1106 */
tracing_on(void)1107 void tracing_on(void)
1108 {
1109 tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112
1113
1114 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 __this_cpu_write(trace_taskinfo_save, true);
1118
1119 /* If this is the temp buffer, we need to commit fully */
1120 if (this_cpu_read(trace_buffered_event) == event) {
1121 /* Length is in event->array[0] */
1122 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 /* Release the temp buffer */
1124 this_cpu_dec(trace_buffered_event_cnt);
1125 /* ring_buffer_unlock_commit() enables preemption */
1126 preempt_enable_notrace();
1127 } else
1128 ring_buffer_unlock_commit(buffer);
1129 }
1130
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 const char *str, int size)
1133 {
1134 struct ring_buffer_event *event;
1135 struct trace_buffer *buffer;
1136 struct print_entry *entry;
1137 unsigned int trace_ctx;
1138 int alloc;
1139
1140 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 return 0;
1142
1143 if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 return 0;
1145
1146 if (unlikely(tracing_disabled))
1147 return 0;
1148
1149 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150
1151 trace_ctx = tracing_gen_ctx();
1152 buffer = tr->array_buffer.buffer;
1153 guard(ring_buffer_nest)(buffer);
1154 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 trace_ctx);
1156 if (!event)
1157 return 0;
1158
1159 entry = ring_buffer_event_data(event);
1160 entry->ip = ip;
1161
1162 memcpy(&entry->buf, str, size);
1163
1164 /* Add a newline if necessary */
1165 if (entry->buf[size - 1] != '\n') {
1166 entry->buf[size] = '\n';
1167 entry->buf[size + 1] = '\0';
1168 } else
1169 entry->buf[size] = '\0';
1170
1171 __buffer_unlock_commit(buffer, event);
1172 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176
1177 /**
1178 * __trace_puts - write a constant string into the trace buffer.
1179 * @ip: The address of the caller
1180 * @str: The constant string to write
1181 * @size: The size of the string.
1182 */
__trace_puts(unsigned long ip,const char * str,int size)1183 int __trace_puts(unsigned long ip, const char *str, int size)
1184 {
1185 return __trace_array_puts(printk_trace, ip, str, size);
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_puts);
1188
1189 /**
1190 * __trace_bputs - write the pointer to a constant string into trace buffer
1191 * @ip: The address of the caller
1192 * @str: The constant string to write to the buffer to
1193 */
__trace_bputs(unsigned long ip,const char * str)1194 int __trace_bputs(unsigned long ip, const char *str)
1195 {
1196 struct trace_array *tr = READ_ONCE(printk_trace);
1197 struct ring_buffer_event *event;
1198 struct trace_buffer *buffer;
1199 struct bputs_entry *entry;
1200 unsigned int trace_ctx;
1201 int size = sizeof(struct bputs_entry);
1202
1203 if (!printk_binsafe(tr))
1204 return __trace_puts(ip, str, strlen(str));
1205
1206 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1207 return 0;
1208
1209 if (unlikely(tracing_selftest_running || tracing_disabled))
1210 return 0;
1211
1212 trace_ctx = tracing_gen_ctx();
1213 buffer = tr->array_buffer.buffer;
1214
1215 guard(ring_buffer_nest)(buffer);
1216 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1217 trace_ctx);
1218 if (!event)
1219 return 0;
1220
1221 entry = ring_buffer_event_data(event);
1222 entry->ip = ip;
1223 entry->str = str;
1224
1225 __buffer_unlock_commit(buffer, event);
1226 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1227
1228 return 1;
1229 }
1230 EXPORT_SYMBOL_GPL(__trace_bputs);
1231
1232 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1233 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1234 void *cond_data)
1235 {
1236 struct tracer *tracer = tr->current_trace;
1237 unsigned long flags;
1238
1239 if (in_nmi()) {
1240 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1241 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1242 return;
1243 }
1244
1245 if (!tr->allocated_snapshot) {
1246 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1247 trace_array_puts(tr, "*** stopping trace here! ***\n");
1248 tracer_tracing_off(tr);
1249 return;
1250 }
1251
1252 /* Note, snapshot can not be used when the tracer uses it */
1253 if (tracer->use_max_tr) {
1254 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1255 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1256 return;
1257 }
1258
1259 if (tr->mapped) {
1260 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1261 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1262 return;
1263 }
1264
1265 local_irq_save(flags);
1266 update_max_tr(tr, current, smp_processor_id(), cond_data);
1267 local_irq_restore(flags);
1268 }
1269
tracing_snapshot_instance(struct trace_array * tr)1270 void tracing_snapshot_instance(struct trace_array *tr)
1271 {
1272 tracing_snapshot_instance_cond(tr, NULL);
1273 }
1274
1275 /**
1276 * tracing_snapshot - take a snapshot of the current buffer.
1277 *
1278 * This causes a swap between the snapshot buffer and the current live
1279 * tracing buffer. You can use this to take snapshots of the live
1280 * trace when some condition is triggered, but continue to trace.
1281 *
1282 * Note, make sure to allocate the snapshot with either
1283 * a tracing_snapshot_alloc(), or by doing it manually
1284 * with: echo 1 > /sys/kernel/tracing/snapshot
1285 *
1286 * If the snapshot buffer is not allocated, it will stop tracing.
1287 * Basically making a permanent snapshot.
1288 */
tracing_snapshot(void)1289 void tracing_snapshot(void)
1290 {
1291 struct trace_array *tr = &global_trace;
1292
1293 tracing_snapshot_instance(tr);
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot);
1296
1297 /**
1298 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1299 * @tr: The tracing instance to snapshot
1300 * @cond_data: The data to be tested conditionally, and possibly saved
1301 *
1302 * This is the same as tracing_snapshot() except that the snapshot is
1303 * conditional - the snapshot will only happen if the
1304 * cond_snapshot.update() implementation receiving the cond_data
1305 * returns true, which means that the trace array's cond_snapshot
1306 * update() operation used the cond_data to determine whether the
1307 * snapshot should be taken, and if it was, presumably saved it along
1308 * with the snapshot.
1309 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1311 {
1312 tracing_snapshot_instance_cond(tr, cond_data);
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1315
1316 /**
1317 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1318 * @tr: The tracing instance
1319 *
1320 * When the user enables a conditional snapshot using
1321 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1322 * with the snapshot. This accessor is used to retrieve it.
1323 *
1324 * Should not be called from cond_snapshot.update(), since it takes
1325 * the tr->max_lock lock, which the code calling
1326 * cond_snapshot.update() has already done.
1327 *
1328 * Returns the cond_data associated with the trace array's snapshot.
1329 */
tracing_cond_snapshot_data(struct trace_array * tr)1330 void *tracing_cond_snapshot_data(struct trace_array *tr)
1331 {
1332 void *cond_data = NULL;
1333
1334 local_irq_disable();
1335 arch_spin_lock(&tr->max_lock);
1336
1337 if (tr->cond_snapshot)
1338 cond_data = tr->cond_snapshot->cond_data;
1339
1340 arch_spin_unlock(&tr->max_lock);
1341 local_irq_enable();
1342
1343 return cond_data;
1344 }
1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1346
1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1348 struct array_buffer *size_buf, int cpu_id);
1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1350
tracing_alloc_snapshot_instance(struct trace_array * tr)1351 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1352 {
1353 int order;
1354 int ret;
1355
1356 if (!tr->allocated_snapshot) {
1357
1358 /* Make the snapshot buffer have the same order as main buffer */
1359 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1360 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1361 if (ret < 0)
1362 return ret;
1363
1364 /* allocate spare buffer */
1365 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1366 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1367 if (ret < 0)
1368 return ret;
1369
1370 tr->allocated_snapshot = true;
1371 }
1372
1373 return 0;
1374 }
1375
free_snapshot(struct trace_array * tr)1376 static void free_snapshot(struct trace_array *tr)
1377 {
1378 /*
1379 * We don't free the ring buffer. instead, resize it because
1380 * The max_tr ring buffer has some state (e.g. ring->clock) and
1381 * we want preserve it.
1382 */
1383 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1384 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1385 set_buffer_entries(&tr->max_buffer, 1);
1386 tracing_reset_online_cpus(&tr->max_buffer);
1387 tr->allocated_snapshot = false;
1388 }
1389
tracing_arm_snapshot_locked(struct trace_array * tr)1390 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1391 {
1392 int ret;
1393
1394 lockdep_assert_held(&trace_types_lock);
1395
1396 spin_lock(&tr->snapshot_trigger_lock);
1397 if (tr->snapshot == UINT_MAX || tr->mapped) {
1398 spin_unlock(&tr->snapshot_trigger_lock);
1399 return -EBUSY;
1400 }
1401
1402 tr->snapshot++;
1403 spin_unlock(&tr->snapshot_trigger_lock);
1404
1405 ret = tracing_alloc_snapshot_instance(tr);
1406 if (ret) {
1407 spin_lock(&tr->snapshot_trigger_lock);
1408 tr->snapshot--;
1409 spin_unlock(&tr->snapshot_trigger_lock);
1410 }
1411
1412 return ret;
1413 }
1414
tracing_arm_snapshot(struct trace_array * tr)1415 int tracing_arm_snapshot(struct trace_array *tr)
1416 {
1417 guard(mutex)(&trace_types_lock);
1418 return tracing_arm_snapshot_locked(tr);
1419 }
1420
tracing_disarm_snapshot(struct trace_array * tr)1421 void tracing_disarm_snapshot(struct trace_array *tr)
1422 {
1423 spin_lock(&tr->snapshot_trigger_lock);
1424 if (!WARN_ON(!tr->snapshot))
1425 tr->snapshot--;
1426 spin_unlock(&tr->snapshot_trigger_lock);
1427 }
1428
1429 /**
1430 * tracing_alloc_snapshot - allocate snapshot buffer.
1431 *
1432 * This only allocates the snapshot buffer if it isn't already
1433 * allocated - it doesn't also take a snapshot.
1434 *
1435 * This is meant to be used in cases where the snapshot buffer needs
1436 * to be set up for events that can't sleep but need to be able to
1437 * trigger a snapshot.
1438 */
tracing_alloc_snapshot(void)1439 int tracing_alloc_snapshot(void)
1440 {
1441 struct trace_array *tr = &global_trace;
1442 int ret;
1443
1444 ret = tracing_alloc_snapshot_instance(tr);
1445 WARN_ON(ret < 0);
1446
1447 return ret;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450
1451 /**
1452 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1453 *
1454 * This is similar to tracing_snapshot(), but it will allocate the
1455 * snapshot buffer if it isn't already allocated. Use this only
1456 * where it is safe to sleep, as the allocation may sleep.
1457 *
1458 * This causes a swap between the snapshot buffer and the current live
1459 * tracing buffer. You can use this to take snapshots of the live
1460 * trace when some condition is triggered, but continue to trace.
1461 */
tracing_snapshot_alloc(void)1462 void tracing_snapshot_alloc(void)
1463 {
1464 int ret;
1465
1466 ret = tracing_alloc_snapshot();
1467 if (ret < 0)
1468 return;
1469
1470 tracing_snapshot();
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1473
1474 /**
1475 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1476 * @tr: The tracing instance
1477 * @cond_data: User data to associate with the snapshot
1478 * @update: Implementation of the cond_snapshot update function
1479 *
1480 * Check whether the conditional snapshot for the given instance has
1481 * already been enabled, or if the current tracer is already using a
1482 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1483 * save the cond_data and update function inside.
1484 *
1485 * Returns 0 if successful, error otherwise.
1486 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1488 cond_update_fn_t update)
1489 {
1490 struct cond_snapshot *cond_snapshot __free(kfree) =
1491 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1492 int ret;
1493
1494 if (!cond_snapshot)
1495 return -ENOMEM;
1496
1497 cond_snapshot->cond_data = cond_data;
1498 cond_snapshot->update = update;
1499
1500 guard(mutex)(&trace_types_lock);
1501
1502 if (tr->current_trace->use_max_tr)
1503 return -EBUSY;
1504
1505 /*
1506 * The cond_snapshot can only change to NULL without the
1507 * trace_types_lock. We don't care if we race with it going
1508 * to NULL, but we want to make sure that it's not set to
1509 * something other than NULL when we get here, which we can
1510 * do safely with only holding the trace_types_lock and not
1511 * having to take the max_lock.
1512 */
1513 if (tr->cond_snapshot)
1514 return -EBUSY;
1515
1516 ret = tracing_arm_snapshot_locked(tr);
1517 if (ret)
1518 return ret;
1519
1520 local_irq_disable();
1521 arch_spin_lock(&tr->max_lock);
1522 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1523 arch_spin_unlock(&tr->max_lock);
1524 local_irq_enable();
1525
1526 return 0;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1529
1530 /**
1531 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1532 * @tr: The tracing instance
1533 *
1534 * Check whether the conditional snapshot for the given instance is
1535 * enabled; if so, free the cond_snapshot associated with it,
1536 * otherwise return -EINVAL.
1537 *
1538 * Returns 0 if successful, error otherwise.
1539 */
tracing_snapshot_cond_disable(struct trace_array * tr)1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542 int ret = 0;
1543
1544 local_irq_disable();
1545 arch_spin_lock(&tr->max_lock);
1546
1547 if (!tr->cond_snapshot)
1548 ret = -EINVAL;
1549 else {
1550 kfree(tr->cond_snapshot);
1551 tr->cond_snapshot = NULL;
1552 }
1553
1554 arch_spin_unlock(&tr->max_lock);
1555 local_irq_enable();
1556
1557 tracing_disarm_snapshot(tr);
1558
1559 return ret;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #else
tracing_snapshot(void)1563 void tracing_snapshot(void)
1564 {
1565 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1566 }
1567 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1569 {
1570 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1571 }
1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1573 int tracing_alloc_snapshot(void)
1574 {
1575 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1576 return -ENODEV;
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1579 void tracing_snapshot_alloc(void)
1580 {
1581 /* Give warning */
1582 tracing_snapshot();
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1585 void *tracing_cond_snapshot_data(struct trace_array *tr)
1586 {
1587 return NULL;
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1591 {
1592 return -ENODEV;
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1595 int tracing_snapshot_cond_disable(struct trace_array *tr)
1596 {
1597 return false;
1598 }
1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1600 #define free_snapshot(tr) do { } while (0)
1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1602 #endif /* CONFIG_TRACER_SNAPSHOT */
1603
tracer_tracing_off(struct trace_array * tr)1604 void tracer_tracing_off(struct trace_array *tr)
1605 {
1606 if (tr->array_buffer.buffer)
1607 ring_buffer_record_off(tr->array_buffer.buffer);
1608 /*
1609 * This flag is looked at when buffers haven't been allocated
1610 * yet, or by some tracers (like irqsoff), that just want to
1611 * know if the ring buffer has been disabled, but it can handle
1612 * races of where it gets disabled but we still do a record.
1613 * As the check is in the fast path of the tracers, it is more
1614 * important to be fast than accurate.
1615 */
1616 tr->buffer_disabled = 1;
1617 }
1618
1619 /**
1620 * tracer_tracing_disable() - temporary disable the buffer from write
1621 * @tr: The trace array to disable its buffer for
1622 *
1623 * Expects trace_tracing_enable() to re-enable tracing.
1624 * The difference between this and tracer_tracing_off() is that this
1625 * is a counter and can nest, whereas, tracer_tracing_off() can
1626 * be called multiple times and a single trace_tracing_on() will
1627 * enable it.
1628 */
tracer_tracing_disable(struct trace_array * tr)1629 void tracer_tracing_disable(struct trace_array *tr)
1630 {
1631 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1632 return;
1633
1634 ring_buffer_record_disable(tr->array_buffer.buffer);
1635 }
1636
1637 /**
1638 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1639 * @tr: The trace array that had tracer_tracincg_disable() called on it
1640 *
1641 * This is called after tracer_tracing_disable() has been called on @tr,
1642 * when it's safe to re-enable tracing.
1643 */
tracer_tracing_enable(struct trace_array * tr)1644 void tracer_tracing_enable(struct trace_array *tr)
1645 {
1646 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1647 return;
1648
1649 ring_buffer_record_enable(tr->array_buffer.buffer);
1650 }
1651
1652 /**
1653 * tracing_off - turn off tracing buffers
1654 *
1655 * This function stops the tracing buffers from recording data.
1656 * It does not disable any overhead the tracers themselves may
1657 * be causing. This function simply causes all recording to
1658 * the ring buffers to fail.
1659 */
tracing_off(void)1660 void tracing_off(void)
1661 {
1662 tracer_tracing_off(&global_trace);
1663 }
1664 EXPORT_SYMBOL_GPL(tracing_off);
1665
disable_trace_on_warning(void)1666 void disable_trace_on_warning(void)
1667 {
1668 if (__disable_trace_on_warning) {
1669 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1670 "Disabling tracing due to warning\n");
1671 tracing_off();
1672 }
1673 }
1674
1675 /**
1676 * tracer_tracing_is_on - show real state of ring buffer enabled
1677 * @tr : the trace array to know if ring buffer is enabled
1678 *
1679 * Shows real state of the ring buffer if it is enabled or not.
1680 */
tracer_tracing_is_on(struct trace_array * tr)1681 bool tracer_tracing_is_on(struct trace_array *tr)
1682 {
1683 if (tr->array_buffer.buffer)
1684 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1685 return !tr->buffer_disabled;
1686 }
1687
1688 /**
1689 * tracing_is_on - show state of ring buffers enabled
1690 */
tracing_is_on(void)1691 int tracing_is_on(void)
1692 {
1693 return tracer_tracing_is_on(&global_trace);
1694 }
1695 EXPORT_SYMBOL_GPL(tracing_is_on);
1696
set_buf_size(char * str)1697 static int __init set_buf_size(char *str)
1698 {
1699 unsigned long buf_size;
1700
1701 if (!str)
1702 return 0;
1703 buf_size = memparse(str, &str);
1704 /*
1705 * nr_entries can not be zero and the startup
1706 * tests require some buffer space. Therefore
1707 * ensure we have at least 4096 bytes of buffer.
1708 */
1709 trace_buf_size = max(4096UL, buf_size);
1710 return 1;
1711 }
1712 __setup("trace_buf_size=", set_buf_size);
1713
set_tracing_thresh(char * str)1714 static int __init set_tracing_thresh(char *str)
1715 {
1716 unsigned long threshold;
1717 int ret;
1718
1719 if (!str)
1720 return 0;
1721 ret = kstrtoul(str, 0, &threshold);
1722 if (ret < 0)
1723 return 0;
1724 tracing_thresh = threshold * 1000;
1725 return 1;
1726 }
1727 __setup("tracing_thresh=", set_tracing_thresh);
1728
nsecs_to_usecs(unsigned long nsecs)1729 unsigned long nsecs_to_usecs(unsigned long nsecs)
1730 {
1731 return nsecs / 1000;
1732 }
1733
1734 /*
1735 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1736 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1737 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1738 * of strings in the order that the evals (enum) were defined.
1739 */
1740 #undef C
1741 #define C(a, b) b
1742
1743 /* These must match the bit positions in trace_iterator_flags */
1744 static const char *trace_options[] = {
1745 TRACE_FLAGS
1746 NULL
1747 };
1748
1749 static struct {
1750 u64 (*func)(void);
1751 const char *name;
1752 int in_ns; /* is this clock in nanoseconds? */
1753 } trace_clocks[] = {
1754 { trace_clock_local, "local", 1 },
1755 { trace_clock_global, "global", 1 },
1756 { trace_clock_counter, "counter", 0 },
1757 { trace_clock_jiffies, "uptime", 0 },
1758 { trace_clock, "perf", 1 },
1759 { ktime_get_mono_fast_ns, "mono", 1 },
1760 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1761 { ktime_get_boot_fast_ns, "boot", 1 },
1762 { ktime_get_tai_fast_ns, "tai", 1 },
1763 ARCH_TRACE_CLOCKS
1764 };
1765
trace_clock_in_ns(struct trace_array * tr)1766 bool trace_clock_in_ns(struct trace_array *tr)
1767 {
1768 if (trace_clocks[tr->clock_id].in_ns)
1769 return true;
1770
1771 return false;
1772 }
1773
1774 /*
1775 * trace_parser_get_init - gets the buffer for trace parser
1776 */
trace_parser_get_init(struct trace_parser * parser,int size)1777 int trace_parser_get_init(struct trace_parser *parser, int size)
1778 {
1779 memset(parser, 0, sizeof(*parser));
1780
1781 parser->buffer = kmalloc(size, GFP_KERNEL);
1782 if (!parser->buffer)
1783 return 1;
1784
1785 parser->size = size;
1786 return 0;
1787 }
1788
1789 /*
1790 * trace_parser_put - frees the buffer for trace parser
1791 */
trace_parser_put(struct trace_parser * parser)1792 void trace_parser_put(struct trace_parser *parser)
1793 {
1794 kfree(parser->buffer);
1795 parser->buffer = NULL;
1796 }
1797
1798 /*
1799 * trace_get_user - reads the user input string separated by space
1800 * (matched by isspace(ch))
1801 *
1802 * For each string found the 'struct trace_parser' is updated,
1803 * and the function returns.
1804 *
1805 * Returns number of bytes read.
1806 *
1807 * See kernel/trace/trace.h for 'struct trace_parser' details.
1808 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1810 size_t cnt, loff_t *ppos)
1811 {
1812 char ch;
1813 size_t read = 0;
1814 ssize_t ret;
1815
1816 if (!*ppos)
1817 trace_parser_clear(parser);
1818
1819 ret = get_user(ch, ubuf++);
1820 if (ret)
1821 goto fail;
1822
1823 read++;
1824 cnt--;
1825
1826 /*
1827 * The parser is not finished with the last write,
1828 * continue reading the user input without skipping spaces.
1829 */
1830 if (!parser->cont) {
1831 /* skip white space */
1832 while (cnt && isspace(ch)) {
1833 ret = get_user(ch, ubuf++);
1834 if (ret)
1835 goto fail;
1836 read++;
1837 cnt--;
1838 }
1839
1840 parser->idx = 0;
1841
1842 /* only spaces were written */
1843 if (isspace(ch) || !ch) {
1844 *ppos += read;
1845 return read;
1846 }
1847 }
1848
1849 /* read the non-space input */
1850 while (cnt && !isspace(ch) && ch) {
1851 if (parser->idx < parser->size - 1)
1852 parser->buffer[parser->idx++] = ch;
1853 else {
1854 ret = -EINVAL;
1855 goto fail;
1856 }
1857
1858 ret = get_user(ch, ubuf++);
1859 if (ret)
1860 goto fail;
1861 read++;
1862 cnt--;
1863 }
1864
1865 /* We either got finished input or we have to wait for another call. */
1866 if (isspace(ch) || !ch) {
1867 parser->buffer[parser->idx] = 0;
1868 parser->cont = false;
1869 } else if (parser->idx < parser->size - 1) {
1870 parser->cont = true;
1871 parser->buffer[parser->idx++] = ch;
1872 /* Make sure the parsed string always terminates with '\0'. */
1873 parser->buffer[parser->idx] = 0;
1874 } else {
1875 ret = -EINVAL;
1876 goto fail;
1877 }
1878
1879 *ppos += read;
1880 return read;
1881 fail:
1882 trace_parser_fail(parser);
1883 return ret;
1884 }
1885
1886 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1888 {
1889 int len;
1890
1891 if (trace_seq_used(s) <= s->readpos)
1892 return -EBUSY;
1893
1894 len = trace_seq_used(s) - s->readpos;
1895 if (cnt > len)
1896 cnt = len;
1897 memcpy(buf, s->buffer + s->readpos, cnt);
1898
1899 s->readpos += cnt;
1900 return cnt;
1901 }
1902
1903 unsigned long __read_mostly tracing_thresh;
1904
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 static const struct file_operations tracing_max_lat_fops;
1907
1908 #ifdef LATENCY_FS_NOTIFY
1909
1910 static struct workqueue_struct *fsnotify_wq;
1911
latency_fsnotify_workfn(struct work_struct * work)1912 static void latency_fsnotify_workfn(struct work_struct *work)
1913 {
1914 struct trace_array *tr = container_of(work, struct trace_array,
1915 fsnotify_work);
1916 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1917 }
1918
latency_fsnotify_workfn_irq(struct irq_work * iwork)1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1920 {
1921 struct trace_array *tr = container_of(iwork, struct trace_array,
1922 fsnotify_irqwork);
1923 queue_work(fsnotify_wq, &tr->fsnotify_work);
1924 }
1925
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1926 static void trace_create_maxlat_file(struct trace_array *tr,
1927 struct dentry *d_tracer)
1928 {
1929 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1930 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1931 tr->d_max_latency = trace_create_file("tracing_max_latency",
1932 TRACE_MODE_WRITE,
1933 d_tracer, tr,
1934 &tracing_max_lat_fops);
1935 }
1936
latency_fsnotify_init(void)1937 __init static int latency_fsnotify_init(void)
1938 {
1939 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1940 WQ_UNBOUND | WQ_HIGHPRI, 0);
1941 if (!fsnotify_wq) {
1942 pr_err("Unable to allocate tr_max_lat_wq\n");
1943 return -ENOMEM;
1944 }
1945 return 0;
1946 }
1947
1948 late_initcall_sync(latency_fsnotify_init);
1949
latency_fsnotify(struct trace_array * tr)1950 void latency_fsnotify(struct trace_array *tr)
1951 {
1952 if (!fsnotify_wq)
1953 return;
1954 /*
1955 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1956 * possible that we are called from __schedule() or do_idle(), which
1957 * could cause a deadlock.
1958 */
1959 irq_work_queue(&tr->fsnotify_irqwork);
1960 }
1961
1962 #else /* !LATENCY_FS_NOTIFY */
1963
1964 #define trace_create_maxlat_file(tr, d_tracer) \
1965 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1966 d_tracer, tr, &tracing_max_lat_fops)
1967
1968 #endif
1969
1970 /*
1971 * Copy the new maximum trace into the separate maximum-trace
1972 * structure. (this way the maximum trace is permanently saved,
1973 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1974 */
1975 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1977 {
1978 struct array_buffer *trace_buf = &tr->array_buffer;
1979 struct array_buffer *max_buf = &tr->max_buffer;
1980 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1981 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1982
1983 max_buf->cpu = cpu;
1984 max_buf->time_start = data->preempt_timestamp;
1985
1986 max_data->saved_latency = tr->max_latency;
1987 max_data->critical_start = data->critical_start;
1988 max_data->critical_end = data->critical_end;
1989
1990 strscpy(max_data->comm, tsk->comm);
1991 max_data->pid = tsk->pid;
1992 /*
1993 * If tsk == current, then use current_uid(), as that does not use
1994 * RCU. The irq tracer can be called out of RCU scope.
1995 */
1996 if (tsk == current)
1997 max_data->uid = current_uid();
1998 else
1999 max_data->uid = task_uid(tsk);
2000
2001 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2002 max_data->policy = tsk->policy;
2003 max_data->rt_priority = tsk->rt_priority;
2004
2005 /* record this tasks comm */
2006 tracing_record_cmdline(tsk);
2007 latency_fsnotify(tr);
2008 }
2009
2010 /**
2011 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2012 * @tr: tracer
2013 * @tsk: the task with the latency
2014 * @cpu: The cpu that initiated the trace.
2015 * @cond_data: User data associated with a conditional snapshot
2016 *
2017 * Flip the buffers between the @tr and the max_tr and record information
2018 * about which task was the cause of this latency.
2019 */
2020 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2022 void *cond_data)
2023 {
2024 if (tr->stop_count)
2025 return;
2026
2027 WARN_ON_ONCE(!irqs_disabled());
2028
2029 if (!tr->allocated_snapshot) {
2030 /* Only the nop tracer should hit this when disabling */
2031 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2032 return;
2033 }
2034
2035 arch_spin_lock(&tr->max_lock);
2036
2037 /* Inherit the recordable setting from array_buffer */
2038 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2039 ring_buffer_record_on(tr->max_buffer.buffer);
2040 else
2041 ring_buffer_record_off(tr->max_buffer.buffer);
2042
2043 #ifdef CONFIG_TRACER_SNAPSHOT
2044 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2045 arch_spin_unlock(&tr->max_lock);
2046 return;
2047 }
2048 #endif
2049 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2050
2051 __update_max_tr(tr, tsk, cpu);
2052
2053 arch_spin_unlock(&tr->max_lock);
2054
2055 /* Any waiters on the old snapshot buffer need to wake up */
2056 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2057 }
2058
2059 /**
2060 * update_max_tr_single - only copy one trace over, and reset the rest
2061 * @tr: tracer
2062 * @tsk: task with the latency
2063 * @cpu: the cpu of the buffer to copy.
2064 *
2065 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2066 */
2067 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2069 {
2070 int ret;
2071
2072 if (tr->stop_count)
2073 return;
2074
2075 WARN_ON_ONCE(!irqs_disabled());
2076 if (!tr->allocated_snapshot) {
2077 /* Only the nop tracer should hit this when disabling */
2078 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2079 return;
2080 }
2081
2082 arch_spin_lock(&tr->max_lock);
2083
2084 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2085
2086 if (ret == -EBUSY) {
2087 /*
2088 * We failed to swap the buffer due to a commit taking
2089 * place on this CPU. We fail to record, but we reset
2090 * the max trace buffer (no one writes directly to it)
2091 * and flag that it failed.
2092 * Another reason is resize is in progress.
2093 */
2094 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2095 "Failed to swap buffers due to commit or resize in progress\n");
2096 }
2097
2098 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2099
2100 __update_max_tr(tr, tsk, cpu);
2101 arch_spin_unlock(&tr->max_lock);
2102 }
2103
2104 #endif /* CONFIG_TRACER_MAX_TRACE */
2105
2106 struct pipe_wait {
2107 struct trace_iterator *iter;
2108 int wait_index;
2109 };
2110
wait_pipe_cond(void * data)2111 static bool wait_pipe_cond(void *data)
2112 {
2113 struct pipe_wait *pwait = data;
2114 struct trace_iterator *iter = pwait->iter;
2115
2116 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2117 return true;
2118
2119 return iter->closed;
2120 }
2121
wait_on_pipe(struct trace_iterator * iter,int full)2122 static int wait_on_pipe(struct trace_iterator *iter, int full)
2123 {
2124 struct pipe_wait pwait;
2125 int ret;
2126
2127 /* Iterators are static, they should be filled or empty */
2128 if (trace_buffer_iter(iter, iter->cpu_file))
2129 return 0;
2130
2131 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2132 pwait.iter = iter;
2133
2134 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2135 wait_pipe_cond, &pwait);
2136
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 /*
2139 * Make sure this is still the snapshot buffer, as if a snapshot were
2140 * to happen, this would now be the main buffer.
2141 */
2142 if (iter->snapshot)
2143 iter->array_buffer = &iter->tr->max_buffer;
2144 #endif
2145 return ret;
2146 }
2147
2148 #ifdef CONFIG_FTRACE_STARTUP_TEST
2149 static bool selftests_can_run;
2150
2151 struct trace_selftests {
2152 struct list_head list;
2153 struct tracer *type;
2154 };
2155
2156 static LIST_HEAD(postponed_selftests);
2157
save_selftest(struct tracer * type)2158 static int save_selftest(struct tracer *type)
2159 {
2160 struct trace_selftests *selftest;
2161
2162 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2163 if (!selftest)
2164 return -ENOMEM;
2165
2166 selftest->type = type;
2167 list_add(&selftest->list, &postponed_selftests);
2168 return 0;
2169 }
2170
run_tracer_selftest(struct tracer * type)2171 static int run_tracer_selftest(struct tracer *type)
2172 {
2173 struct trace_array *tr = &global_trace;
2174 struct tracer_flags *saved_flags = tr->current_trace_flags;
2175 struct tracer *saved_tracer = tr->current_trace;
2176 int ret;
2177
2178 if (!type->selftest || tracing_selftest_disabled)
2179 return 0;
2180
2181 /*
2182 * If a tracer registers early in boot up (before scheduling is
2183 * initialized and such), then do not run its selftests yet.
2184 * Instead, run it a little later in the boot process.
2185 */
2186 if (!selftests_can_run)
2187 return save_selftest(type);
2188
2189 if (!tracing_is_on()) {
2190 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2191 type->name);
2192 return 0;
2193 }
2194
2195 /*
2196 * Run a selftest on this tracer.
2197 * Here we reset the trace buffer, and set the current
2198 * tracer to be this tracer. The tracer can then run some
2199 * internal tracing to verify that everything is in order.
2200 * If we fail, we do not register this tracer.
2201 */
2202 tracing_reset_online_cpus(&tr->array_buffer);
2203
2204 tr->current_trace = type;
2205 tr->current_trace_flags = type->flags ? : type->default_flags;
2206
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 if (type->use_max_tr) {
2209 /* If we expanded the buffers, make sure the max is expanded too */
2210 if (tr->ring_buffer_expanded)
2211 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 RING_BUFFER_ALL_CPUS);
2213 tr->allocated_snapshot = true;
2214 }
2215 #endif
2216
2217 /* the test is responsible for initializing and enabling */
2218 pr_info("Testing tracer %s: ", type->name);
2219 ret = type->selftest(type, tr);
2220 /* the test is responsible for resetting too */
2221 tr->current_trace = saved_tracer;
2222 tr->current_trace_flags = saved_flags;
2223 if (ret) {
2224 printk(KERN_CONT "FAILED!\n");
2225 /* Add the warning after printing 'FAILED' */
2226 WARN_ON(1);
2227 return -1;
2228 }
2229 /* Only reset on passing, to avoid touching corrupted buffers */
2230 tracing_reset_online_cpus(&tr->array_buffer);
2231
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 if (type->use_max_tr) {
2234 tr->allocated_snapshot = false;
2235
2236 /* Shrink the max buffer again */
2237 if (tr->ring_buffer_expanded)
2238 ring_buffer_resize(tr->max_buffer.buffer, 1,
2239 RING_BUFFER_ALL_CPUS);
2240 }
2241 #endif
2242
2243 printk(KERN_CONT "PASSED\n");
2244 return 0;
2245 }
2246
do_run_tracer_selftest(struct tracer * type)2247 static int do_run_tracer_selftest(struct tracer *type)
2248 {
2249 int ret;
2250
2251 /*
2252 * Tests can take a long time, especially if they are run one after the
2253 * other, as does happen during bootup when all the tracers are
2254 * registered. This could cause the soft lockup watchdog to trigger.
2255 */
2256 cond_resched();
2257
2258 tracing_selftest_running = true;
2259 ret = run_tracer_selftest(type);
2260 tracing_selftest_running = false;
2261
2262 return ret;
2263 }
2264
init_trace_selftests(void)2265 static __init int init_trace_selftests(void)
2266 {
2267 struct trace_selftests *p, *n;
2268 struct tracer *t, **last;
2269 int ret;
2270
2271 selftests_can_run = true;
2272
2273 guard(mutex)(&trace_types_lock);
2274
2275 if (list_empty(&postponed_selftests))
2276 return 0;
2277
2278 pr_info("Running postponed tracer tests:\n");
2279
2280 tracing_selftest_running = true;
2281 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2282 /* This loop can take minutes when sanitizers are enabled, so
2283 * lets make sure we allow RCU processing.
2284 */
2285 cond_resched();
2286 ret = run_tracer_selftest(p->type);
2287 /* If the test fails, then warn and remove from available_tracers */
2288 if (ret < 0) {
2289 WARN(1, "tracer: %s failed selftest, disabling\n",
2290 p->type->name);
2291 last = &trace_types;
2292 for (t = trace_types; t; t = t->next) {
2293 if (t == p->type) {
2294 *last = t->next;
2295 break;
2296 }
2297 last = &t->next;
2298 }
2299 }
2300 list_del(&p->list);
2301 kfree(p);
2302 }
2303 tracing_selftest_running = false;
2304
2305 return 0;
2306 }
2307 core_initcall(init_trace_selftests);
2308 #else
do_run_tracer_selftest(struct tracer * type)2309 static inline int do_run_tracer_selftest(struct tracer *type)
2310 {
2311 return 0;
2312 }
2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2314
2315 static int add_tracer(struct trace_array *tr, struct tracer *t);
2316
2317 static void __init apply_trace_boot_options(void);
2318
free_tracers(struct trace_array * tr)2319 static void free_tracers(struct trace_array *tr)
2320 {
2321 struct tracers *t, *n;
2322
2323 lockdep_assert_held(&trace_types_lock);
2324
2325 list_for_each_entry_safe(t, n, &tr->tracers, list) {
2326 list_del(&t->list);
2327 kfree(t->flags);
2328 kfree(t);
2329 }
2330 }
2331
2332 /**
2333 * register_tracer - register a tracer with the ftrace system.
2334 * @type: the plugin for the tracer
2335 *
2336 * Register a new plugin tracer.
2337 */
register_tracer(struct tracer * type)2338 int __init register_tracer(struct tracer *type)
2339 {
2340 struct trace_array *tr;
2341 struct tracer *t;
2342 int ret = 0;
2343
2344 if (!type->name) {
2345 pr_info("Tracer must have a name\n");
2346 return -1;
2347 }
2348
2349 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2350 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2351 return -1;
2352 }
2353
2354 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2355 pr_warn("Can not register tracer %s due to lockdown\n",
2356 type->name);
2357 return -EPERM;
2358 }
2359
2360 mutex_lock(&trace_types_lock);
2361
2362 for (t = trace_types; t; t = t->next) {
2363 if (strcmp(type->name, t->name) == 0) {
2364 /* already found */
2365 pr_info("Tracer %s already registered\n",
2366 type->name);
2367 ret = -1;
2368 goto out;
2369 }
2370 }
2371
2372 /* store the tracer for __set_tracer_option */
2373 if (type->flags)
2374 type->flags->trace = type;
2375
2376 ret = do_run_tracer_selftest(type);
2377 if (ret < 0)
2378 goto out;
2379
2380 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2381 ret = add_tracer(tr, type);
2382 if (ret < 0) {
2383 /* The tracer will still exist but without options */
2384 pr_warn("Failed to create tracer options for %s\n", type->name);
2385 break;
2386 }
2387 }
2388
2389 type->next = trace_types;
2390 trace_types = type;
2391
2392 out:
2393 mutex_unlock(&trace_types_lock);
2394
2395 if (ret || !default_bootup_tracer)
2396 return ret;
2397
2398 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2399 return 0;
2400
2401 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2402 /* Do we want this tracer to start on bootup? */
2403 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2404 default_bootup_tracer = NULL;
2405
2406 apply_trace_boot_options();
2407
2408 /* disable other selftests, since this will break it. */
2409 disable_tracing_selftest("running a tracer");
2410
2411 return 0;
2412 }
2413
tracing_reset_cpu(struct array_buffer * buf,int cpu)2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2415 {
2416 struct trace_buffer *buffer = buf->buffer;
2417
2418 if (!buffer)
2419 return;
2420
2421 ring_buffer_record_disable(buffer);
2422
2423 /* Make sure all commits have finished */
2424 synchronize_rcu();
2425 ring_buffer_reset_cpu(buffer, cpu);
2426
2427 ring_buffer_record_enable(buffer);
2428 }
2429
tracing_reset_online_cpus(struct array_buffer * buf)2430 void tracing_reset_online_cpus(struct array_buffer *buf)
2431 {
2432 struct trace_buffer *buffer = buf->buffer;
2433
2434 if (!buffer)
2435 return;
2436
2437 ring_buffer_record_disable(buffer);
2438
2439 /* Make sure all commits have finished */
2440 synchronize_rcu();
2441
2442 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443
2444 ring_buffer_reset_online_cpus(buffer);
2445
2446 ring_buffer_record_enable(buffer);
2447 }
2448
tracing_reset_all_cpus(struct array_buffer * buf)2449 static void tracing_reset_all_cpus(struct array_buffer *buf)
2450 {
2451 struct trace_buffer *buffer = buf->buffer;
2452
2453 if (!buffer)
2454 return;
2455
2456 ring_buffer_record_disable(buffer);
2457
2458 /* Make sure all commits have finished */
2459 synchronize_rcu();
2460
2461 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2462
2463 ring_buffer_reset(buffer);
2464
2465 ring_buffer_record_enable(buffer);
2466 }
2467
2468 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2469 void tracing_reset_all_online_cpus_unlocked(void)
2470 {
2471 struct trace_array *tr;
2472
2473 lockdep_assert_held(&trace_types_lock);
2474
2475 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2476 if (!tr->clear_trace)
2477 continue;
2478 tr->clear_trace = false;
2479 tracing_reset_online_cpus(&tr->array_buffer);
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481 tracing_reset_online_cpus(&tr->max_buffer);
2482 #endif
2483 }
2484 }
2485
tracing_reset_all_online_cpus(void)2486 void tracing_reset_all_online_cpus(void)
2487 {
2488 guard(mutex)(&trace_types_lock);
2489 tracing_reset_all_online_cpus_unlocked();
2490 }
2491
is_tracing_stopped(void)2492 int is_tracing_stopped(void)
2493 {
2494 return global_trace.stop_count;
2495 }
2496
tracing_start_tr(struct trace_array * tr)2497 static void tracing_start_tr(struct trace_array *tr)
2498 {
2499 struct trace_buffer *buffer;
2500
2501 if (tracing_disabled)
2502 return;
2503
2504 guard(raw_spinlock_irqsave)(&tr->start_lock);
2505 if (--tr->stop_count) {
2506 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2507 /* Someone screwed up their debugging */
2508 tr->stop_count = 0;
2509 }
2510 return;
2511 }
2512
2513 /* Prevent the buffers from switching */
2514 arch_spin_lock(&tr->max_lock);
2515
2516 buffer = tr->array_buffer.buffer;
2517 if (buffer)
2518 ring_buffer_record_enable(buffer);
2519
2520 #ifdef CONFIG_TRACER_MAX_TRACE
2521 buffer = tr->max_buffer.buffer;
2522 if (buffer)
2523 ring_buffer_record_enable(buffer);
2524 #endif
2525
2526 arch_spin_unlock(&tr->max_lock);
2527 }
2528
2529 /**
2530 * tracing_start - quick start of the tracer
2531 *
2532 * If tracing is enabled but was stopped by tracing_stop,
2533 * this will start the tracer back up.
2534 */
tracing_start(void)2535 void tracing_start(void)
2536
2537 {
2538 return tracing_start_tr(&global_trace);
2539 }
2540
tracing_stop_tr(struct trace_array * tr)2541 static void tracing_stop_tr(struct trace_array *tr)
2542 {
2543 struct trace_buffer *buffer;
2544
2545 guard(raw_spinlock_irqsave)(&tr->start_lock);
2546 if (tr->stop_count++)
2547 return;
2548
2549 /* Prevent the buffers from switching */
2550 arch_spin_lock(&tr->max_lock);
2551
2552 buffer = tr->array_buffer.buffer;
2553 if (buffer)
2554 ring_buffer_record_disable(buffer);
2555
2556 #ifdef CONFIG_TRACER_MAX_TRACE
2557 buffer = tr->max_buffer.buffer;
2558 if (buffer)
2559 ring_buffer_record_disable(buffer);
2560 #endif
2561
2562 arch_spin_unlock(&tr->max_lock);
2563 }
2564
2565 /**
2566 * tracing_stop - quick stop of the tracer
2567 *
2568 * Light weight way to stop tracing. Use in conjunction with
2569 * tracing_start.
2570 */
tracing_stop(void)2571 void tracing_stop(void)
2572 {
2573 return tracing_stop_tr(&global_trace);
2574 }
2575
2576 /*
2577 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579 * simplifies those functions and keeps them in sync.
2580 */
trace_handle_return(struct trace_seq * s)2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 return trace_seq_has_overflowed(s) ?
2584 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587
migration_disable_value(void)2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 return current->migration_disabled;
2592 #else
2593 return 0;
2594 #endif
2595 }
2596
tracing_gen_ctx_irq_test(unsigned int irqs_status)2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 unsigned int trace_flags = irqs_status;
2600 unsigned int pc;
2601
2602 pc = preempt_count();
2603
2604 if (pc & NMI_MASK)
2605 trace_flags |= TRACE_FLAG_NMI;
2606 if (pc & HARDIRQ_MASK)
2607 trace_flags |= TRACE_FLAG_HARDIRQ;
2608 if (in_serving_softirq())
2609 trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 trace_flags |= TRACE_FLAG_BH_OFF;
2612
2613 if (tif_need_resched())
2614 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 if (test_preempt_need_resched())
2616 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2618 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2619 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2620 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2621 }
2622
2623 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2624 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2625 int type,
2626 unsigned long len,
2627 unsigned int trace_ctx)
2628 {
2629 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2630 }
2631
2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2634 static int trace_buffered_event_ref;
2635
2636 /**
2637 * trace_buffered_event_enable - enable buffering events
2638 *
2639 * When events are being filtered, it is quicker to use a temporary
2640 * buffer to write the event data into if there's a likely chance
2641 * that it will not be committed. The discard of the ring buffer
2642 * is not as fast as committing, and is much slower than copying
2643 * a commit.
2644 *
2645 * When an event is to be filtered, allocate per cpu buffers to
2646 * write the event data into, and if the event is filtered and discarded
2647 * it is simply dropped, otherwise, the entire data is to be committed
2648 * in one shot.
2649 */
trace_buffered_event_enable(void)2650 void trace_buffered_event_enable(void)
2651 {
2652 struct ring_buffer_event *event;
2653 struct page *page;
2654 int cpu;
2655
2656 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2657
2658 if (trace_buffered_event_ref++)
2659 return;
2660
2661 for_each_tracing_cpu(cpu) {
2662 page = alloc_pages_node(cpu_to_node(cpu),
2663 GFP_KERNEL | __GFP_NORETRY, 0);
2664 /* This is just an optimization and can handle failures */
2665 if (!page) {
2666 pr_err("Failed to allocate event buffer\n");
2667 break;
2668 }
2669
2670 event = page_address(page);
2671 memset(event, 0, sizeof(*event));
2672
2673 per_cpu(trace_buffered_event, cpu) = event;
2674
2675 scoped_guard(preempt,) {
2676 if (cpu == smp_processor_id() &&
2677 __this_cpu_read(trace_buffered_event) !=
2678 per_cpu(trace_buffered_event, cpu))
2679 WARN_ON_ONCE(1);
2680 }
2681 }
2682 }
2683
enable_trace_buffered_event(void * data)2684 static void enable_trace_buffered_event(void *data)
2685 {
2686 this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693
2694 /**
2695 * trace_buffered_event_disable - disable buffering events
2696 *
2697 * When a filter is removed, it is faster to not use the buffered
2698 * events, and to commit directly into the ring buffer. Free up
2699 * the temp buffers when there are no more users. This requires
2700 * special synchronization with current events.
2701 */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 int cpu;
2705
2706 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707
2708 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 return;
2710
2711 if (--trace_buffered_event_ref)
2712 return;
2713
2714 /* For each CPU, set the buffer as used. */
2715 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 NULL, true);
2717
2718 /* Wait for all current users to finish */
2719 synchronize_rcu();
2720
2721 for_each_tracing_cpu(cpu) {
2722 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 per_cpu(trace_buffered_event, cpu) = NULL;
2724 }
2725
2726 /*
2727 * Wait for all CPUs that potentially started checking if they can use
2728 * their event buffer only after the previous synchronize_rcu() call and
2729 * they still read a valid pointer from trace_buffered_event. It must be
2730 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 */
2733 synchronize_rcu();
2734
2735 /* For each CPU, relinquish the buffer */
2736 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 true);
2738 }
2739
2740 static struct trace_buffer *temp_buffer;
2741
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 struct trace_event_file *trace_file,
2745 int type, unsigned long len,
2746 unsigned int trace_ctx)
2747 {
2748 struct ring_buffer_event *entry;
2749 struct trace_array *tr = trace_file->tr;
2750 int val;
2751
2752 *current_rb = tr->array_buffer.buffer;
2753
2754 if (!tr->no_filter_buffering_ref &&
2755 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2756 preempt_disable_notrace();
2757 /*
2758 * Filtering is on, so try to use the per cpu buffer first.
2759 * This buffer will simulate a ring_buffer_event,
2760 * where the type_len is zero and the array[0] will
2761 * hold the full length.
2762 * (see include/linux/ring-buffer.h for details on
2763 * how the ring_buffer_event is structured).
2764 *
2765 * Using a temp buffer during filtering and copying it
2766 * on a matched filter is quicker than writing directly
2767 * into the ring buffer and then discarding it when
2768 * it doesn't match. That is because the discard
2769 * requires several atomic operations to get right.
2770 * Copying on match and doing nothing on a failed match
2771 * is still quicker than no copy on match, but having
2772 * to discard out of the ring buffer on a failed match.
2773 */
2774 if ((entry = __this_cpu_read(trace_buffered_event))) {
2775 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2776
2777 val = this_cpu_inc_return(trace_buffered_event_cnt);
2778
2779 /*
2780 * Preemption is disabled, but interrupts and NMIs
2781 * can still come in now. If that happens after
2782 * the above increment, then it will have to go
2783 * back to the old method of allocating the event
2784 * on the ring buffer, and if the filter fails, it
2785 * will have to call ring_buffer_discard_commit()
2786 * to remove it.
2787 *
2788 * Need to also check the unlikely case that the
2789 * length is bigger than the temp buffer size.
2790 * If that happens, then the reserve is pretty much
2791 * guaranteed to fail, as the ring buffer currently
2792 * only allows events less than a page. But that may
2793 * change in the future, so let the ring buffer reserve
2794 * handle the failure in that case.
2795 */
2796 if (val == 1 && likely(len <= max_len)) {
2797 trace_event_setup(entry, type, trace_ctx);
2798 entry->array[0] = len;
2799 /* Return with preemption disabled */
2800 return entry;
2801 }
2802 this_cpu_dec(trace_buffered_event_cnt);
2803 }
2804 /* __trace_buffer_lock_reserve() disables preemption */
2805 preempt_enable_notrace();
2806 }
2807
2808 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2809 trace_ctx);
2810 /*
2811 * If tracing is off, but we have triggers enabled
2812 * we still need to look at the event data. Use the temp_buffer
2813 * to store the trace event for the trigger to use. It's recursive
2814 * safe and will not be recorded anywhere.
2815 */
2816 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2817 *current_rb = temp_buffer;
2818 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2819 trace_ctx);
2820 }
2821 return entry;
2822 }
2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2824
2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2826 static DEFINE_MUTEX(tracepoint_printk_mutex);
2827
output_printk(struct trace_event_buffer * fbuffer)2828 static void output_printk(struct trace_event_buffer *fbuffer)
2829 {
2830 struct trace_event_call *event_call;
2831 struct trace_event_file *file;
2832 struct trace_event *event;
2833 unsigned long flags;
2834 struct trace_iterator *iter = tracepoint_print_iter;
2835
2836 /* We should never get here if iter is NULL */
2837 if (WARN_ON_ONCE(!iter))
2838 return;
2839
2840 event_call = fbuffer->trace_file->event_call;
2841 if (!event_call || !event_call->event.funcs ||
2842 !event_call->event.funcs->trace)
2843 return;
2844
2845 file = fbuffer->trace_file;
2846 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2847 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2848 !filter_match_preds(file->filter, fbuffer->entry)))
2849 return;
2850
2851 event = &fbuffer->trace_file->event_call->event;
2852
2853 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2854 trace_seq_init(&iter->seq);
2855 iter->ent = fbuffer->entry;
2856 event_call->event.funcs->trace(iter, 0, event);
2857 trace_seq_putc(&iter->seq, 0);
2858 printk("%s", iter->seq.buffer);
2859
2860 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2861 }
2862
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2864 void *buffer, size_t *lenp,
2865 loff_t *ppos)
2866 {
2867 int save_tracepoint_printk;
2868 int ret;
2869
2870 guard(mutex)(&tracepoint_printk_mutex);
2871 save_tracepoint_printk = tracepoint_printk;
2872
2873 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2874
2875 /*
2876 * This will force exiting early, as tracepoint_printk
2877 * is always zero when tracepoint_printk_iter is not allocated
2878 */
2879 if (!tracepoint_print_iter)
2880 tracepoint_printk = 0;
2881
2882 if (save_tracepoint_printk == tracepoint_printk)
2883 return ret;
2884
2885 if (tracepoint_printk)
2886 static_key_enable(&tracepoint_printk_key.key);
2887 else
2888 static_key_disable(&tracepoint_printk_key.key);
2889
2890 return ret;
2891 }
2892
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2894 {
2895 enum event_trigger_type tt = ETT_NONE;
2896 struct trace_event_file *file = fbuffer->trace_file;
2897
2898 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2899 fbuffer->entry, &tt))
2900 goto discard;
2901
2902 if (static_key_false(&tracepoint_printk_key.key))
2903 output_printk(fbuffer);
2904
2905 if (static_branch_unlikely(&trace_event_exports_enabled))
2906 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2907
2908 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2909 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2910
2911 discard:
2912 if (tt)
2913 event_triggers_post_call(file, tt);
2914
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2917
2918 /*
2919 * Skip 3:
2920 *
2921 * trace_buffer_unlock_commit_regs()
2922 * trace_event_buffer_commit()
2923 * trace_event_raw_event_xxx()
2924 */
2925 # define STACK_SKIP 3
2926
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2928 struct trace_buffer *buffer,
2929 struct ring_buffer_event *event,
2930 unsigned int trace_ctx,
2931 struct pt_regs *regs)
2932 {
2933 __buffer_unlock_commit(buffer, event);
2934
2935 /*
2936 * If regs is not set, then skip the necessary functions.
2937 * Note, we can still get here via blktrace, wakeup tracer
2938 * and mmiotrace, but that's ok if they lose a function or
2939 * two. They are not that meaningful.
2940 */
2941 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2942 ftrace_trace_userstack(tr, buffer, trace_ctx);
2943 }
2944
2945 /*
2946 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2947 */
2948 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2950 struct ring_buffer_event *event)
2951 {
2952 __buffer_unlock_commit(buffer, event);
2953 }
2954
2955 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2957 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2958 {
2959 struct trace_buffer *buffer = tr->array_buffer.buffer;
2960 struct ring_buffer_event *event;
2961 struct ftrace_entry *entry;
2962 int size = sizeof(*entry);
2963
2964 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2965
2966 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2967 trace_ctx);
2968 if (!event)
2969 return;
2970 entry = ring_buffer_event_data(event);
2971 entry->ip = ip;
2972 entry->parent_ip = parent_ip;
2973
2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2975 if (fregs) {
2976 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2977 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2978 }
2979 #endif
2980
2981 if (static_branch_unlikely(&trace_function_exports_enabled))
2982 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2983 __buffer_unlock_commit(buffer, event);
2984 }
2985
2986 #ifdef CONFIG_STACKTRACE
2987
2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2989 #define FTRACE_KSTACK_NESTING 4
2990
2991 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2992
2993 struct ftrace_stack {
2994 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2995 };
2996
2997
2998 struct ftrace_stacks {
2999 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3000 };
3001
3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3004
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3005 static void __ftrace_trace_stack(struct trace_array *tr,
3006 struct trace_buffer *buffer,
3007 unsigned int trace_ctx,
3008 int skip, struct pt_regs *regs)
3009 {
3010 struct ring_buffer_event *event;
3011 unsigned int size, nr_entries;
3012 struct ftrace_stack *fstack;
3013 struct stack_entry *entry;
3014 int stackidx;
3015
3016 /*
3017 * Add one, for this function and the call to save_stack_trace()
3018 * If regs is set, then these functions will not be in the way.
3019 */
3020 #ifndef CONFIG_UNWINDER_ORC
3021 if (!regs)
3022 skip++;
3023 #endif
3024
3025 guard(preempt_notrace)();
3026
3027 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3028
3029 /* This should never happen. If it does, yell once and skip */
3030 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3031 goto out;
3032
3033 /*
3034 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3035 * interrupt will either see the value pre increment or post
3036 * increment. If the interrupt happens pre increment it will have
3037 * restored the counter when it returns. We just need a barrier to
3038 * keep gcc from moving things around.
3039 */
3040 barrier();
3041
3042 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3043 size = ARRAY_SIZE(fstack->calls);
3044
3045 if (regs) {
3046 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3047 size, skip);
3048 } else {
3049 nr_entries = stack_trace_save(fstack->calls, size, skip);
3050 }
3051
3052 #ifdef CONFIG_DYNAMIC_FTRACE
3053 /* Mark entry of stack trace as trampoline code */
3054 if (tr->ops && tr->ops->trampoline) {
3055 unsigned long tramp_start = tr->ops->trampoline;
3056 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3057 unsigned long *calls = fstack->calls;
3058
3059 for (int i = 0; i < nr_entries; i++) {
3060 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3061 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3062 }
3063 }
3064 #endif
3065
3066 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3067 struct_size(entry, caller, nr_entries),
3068 trace_ctx);
3069 if (!event)
3070 goto out;
3071 entry = ring_buffer_event_data(event);
3072
3073 entry->size = nr_entries;
3074 memcpy(&entry->caller, fstack->calls,
3075 flex_array_size(entry, caller, nr_entries));
3076
3077 __buffer_unlock_commit(buffer, event);
3078
3079 out:
3080 /* Again, don't let gcc optimize things here */
3081 barrier();
3082 __this_cpu_dec(ftrace_stack_reserve);
3083 }
3084
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3085 static inline void ftrace_trace_stack(struct trace_array *tr,
3086 struct trace_buffer *buffer,
3087 unsigned int trace_ctx,
3088 int skip, struct pt_regs *regs)
3089 {
3090 if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3091 return;
3092
3093 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3094 }
3095
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3097 int skip)
3098 {
3099 struct trace_buffer *buffer = tr->array_buffer.buffer;
3100
3101 if (rcu_is_watching()) {
3102 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3103 return;
3104 }
3105
3106 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3107 return;
3108
3109 /*
3110 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3111 * but if the above rcu_is_watching() failed, then the NMI
3112 * triggered someplace critical, and ct_irq_enter() should
3113 * not be called from NMI.
3114 */
3115 if (unlikely(in_nmi()))
3116 return;
3117
3118 ct_irq_enter_irqson();
3119 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3120 ct_irq_exit_irqson();
3121 }
3122
3123 /**
3124 * trace_dump_stack - record a stack back trace in the trace buffer
3125 * @skip: Number of functions to skip (helper handlers)
3126 */
trace_dump_stack(int skip)3127 void trace_dump_stack(int skip)
3128 {
3129 if (tracing_disabled || tracing_selftest_running)
3130 return;
3131
3132 #ifndef CONFIG_UNWINDER_ORC
3133 /* Skip 1 to skip this function. */
3134 skip++;
3135 #endif
3136 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3137 tracing_gen_ctx(), skip, NULL);
3138 }
3139 EXPORT_SYMBOL_GPL(trace_dump_stack);
3140
3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3142 static DEFINE_PER_CPU(int, user_stack_count);
3143
3144 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3145 ftrace_trace_userstack(struct trace_array *tr,
3146 struct trace_buffer *buffer, unsigned int trace_ctx)
3147 {
3148 struct ring_buffer_event *event;
3149 struct userstack_entry *entry;
3150
3151 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3152 return;
3153
3154 /*
3155 * NMIs can not handle page faults, even with fix ups.
3156 * The save user stack can (and often does) fault.
3157 */
3158 if (unlikely(in_nmi()))
3159 return;
3160
3161 /*
3162 * prevent recursion, since the user stack tracing may
3163 * trigger other kernel events.
3164 */
3165 guard(preempt)();
3166 if (__this_cpu_read(user_stack_count))
3167 return;
3168
3169 __this_cpu_inc(user_stack_count);
3170
3171 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3172 sizeof(*entry), trace_ctx);
3173 if (!event)
3174 goto out_drop_count;
3175 entry = ring_buffer_event_data(event);
3176
3177 entry->tgid = current->tgid;
3178 memset(&entry->caller, 0, sizeof(entry->caller));
3179
3180 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3181 __buffer_unlock_commit(buffer, event);
3182
3183 out_drop_count:
3184 __this_cpu_dec(user_stack_count);
3185 }
3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3187 static void ftrace_trace_userstack(struct trace_array *tr,
3188 struct trace_buffer *buffer,
3189 unsigned int trace_ctx)
3190 {
3191 }
3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3193
3194 #endif /* CONFIG_STACKTRACE */
3195
3196 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3198 unsigned long long delta)
3199 {
3200 entry->bottom_delta_ts = delta & U32_MAX;
3201 entry->top_delta_ts = (delta >> 32);
3202 }
3203
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3204 void trace_last_func_repeats(struct trace_array *tr,
3205 struct trace_func_repeats *last_info,
3206 unsigned int trace_ctx)
3207 {
3208 struct trace_buffer *buffer = tr->array_buffer.buffer;
3209 struct func_repeats_entry *entry;
3210 struct ring_buffer_event *event;
3211 u64 delta;
3212
3213 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3214 sizeof(*entry), trace_ctx);
3215 if (!event)
3216 return;
3217
3218 delta = ring_buffer_event_time_stamp(buffer, event) -
3219 last_info->ts_last_call;
3220
3221 entry = ring_buffer_event_data(event);
3222 entry->ip = last_info->ip;
3223 entry->parent_ip = last_info->parent_ip;
3224 entry->count = last_info->count;
3225 func_repeats_set_delta_ts(entry, delta);
3226
3227 __buffer_unlock_commit(buffer, event);
3228 }
3229
3230 /* created for use with alloc_percpu */
3231 struct trace_buffer_struct {
3232 int nesting;
3233 char buffer[4][TRACE_BUF_SIZE];
3234 };
3235
3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3237
3238 /*
3239 * This allows for lockless recording. If we're nested too deeply, then
3240 * this returns NULL.
3241 */
get_trace_buf(void)3242 static char *get_trace_buf(void)
3243 {
3244 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3245
3246 if (!trace_percpu_buffer || buffer->nesting >= 4)
3247 return NULL;
3248
3249 buffer->nesting++;
3250
3251 /* Interrupts must see nesting incremented before we use the buffer */
3252 barrier();
3253 return &buffer->buffer[buffer->nesting - 1][0];
3254 }
3255
put_trace_buf(void)3256 static void put_trace_buf(void)
3257 {
3258 /* Don't let the decrement of nesting leak before this */
3259 barrier();
3260 this_cpu_dec(trace_percpu_buffer->nesting);
3261 }
3262
alloc_percpu_trace_buffer(void)3263 static int alloc_percpu_trace_buffer(void)
3264 {
3265 struct trace_buffer_struct __percpu *buffers;
3266
3267 if (trace_percpu_buffer)
3268 return 0;
3269
3270 buffers = alloc_percpu(struct trace_buffer_struct);
3271 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3272 return -ENOMEM;
3273
3274 trace_percpu_buffer = buffers;
3275 return 0;
3276 }
3277
3278 static int buffers_allocated;
3279
trace_printk_init_buffers(void)3280 void trace_printk_init_buffers(void)
3281 {
3282 if (buffers_allocated)
3283 return;
3284
3285 if (alloc_percpu_trace_buffer())
3286 return;
3287
3288 /* trace_printk() is for debug use only. Don't use it in production. */
3289
3290 pr_warn("\n");
3291 pr_warn("**********************************************************\n");
3292 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3293 pr_warn("** **\n");
3294 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3295 pr_warn("** **\n");
3296 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3297 pr_warn("** unsafe for production use. **\n");
3298 pr_warn("** **\n");
3299 pr_warn("** If you see this message and you are not debugging **\n");
3300 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3301 pr_warn("** **\n");
3302 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3303 pr_warn("**********************************************************\n");
3304
3305 /* Expand the buffers to set size */
3306 tracing_update_buffers(&global_trace);
3307
3308 buffers_allocated = 1;
3309
3310 /*
3311 * trace_printk_init_buffers() can be called by modules.
3312 * If that happens, then we need to start cmdline recording
3313 * directly here. If the global_trace.buffer is already
3314 * allocated here, then this was called by module code.
3315 */
3316 if (global_trace.array_buffer.buffer)
3317 tracing_start_cmdline_record();
3318 }
3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3320
trace_printk_start_comm(void)3321 void trace_printk_start_comm(void)
3322 {
3323 /* Start tracing comms if trace printk is set */
3324 if (!buffers_allocated)
3325 return;
3326 tracing_start_cmdline_record();
3327 }
3328
trace_printk_start_stop_comm(int enabled)3329 static void trace_printk_start_stop_comm(int enabled)
3330 {
3331 if (!buffers_allocated)
3332 return;
3333
3334 if (enabled)
3335 tracing_start_cmdline_record();
3336 else
3337 tracing_stop_cmdline_record();
3338 }
3339
3340 /**
3341 * trace_vbprintk - write binary msg to tracing buffer
3342 * @ip: The address of the caller
3343 * @fmt: The string format to write to the buffer
3344 * @args: Arguments for @fmt
3345 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3347 {
3348 struct ring_buffer_event *event;
3349 struct trace_buffer *buffer;
3350 struct trace_array *tr = READ_ONCE(printk_trace);
3351 struct bprint_entry *entry;
3352 unsigned int trace_ctx;
3353 char *tbuffer;
3354 int len = 0, size;
3355
3356 if (!printk_binsafe(tr))
3357 return trace_vprintk(ip, fmt, args);
3358
3359 if (unlikely(tracing_selftest_running || tracing_disabled))
3360 return 0;
3361
3362 /* Don't pollute graph traces with trace_vprintk internals */
3363 pause_graph_tracing();
3364
3365 trace_ctx = tracing_gen_ctx();
3366 guard(preempt_notrace)();
3367
3368 tbuffer = get_trace_buf();
3369 if (!tbuffer) {
3370 len = 0;
3371 goto out_nobuffer;
3372 }
3373
3374 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375
3376 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377 goto out_put;
3378
3379 size = sizeof(*entry) + sizeof(u32) * len;
3380 buffer = tr->array_buffer.buffer;
3381 scoped_guard(ring_buffer_nest, buffer) {
3382 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383 trace_ctx);
3384 if (!event)
3385 goto out_put;
3386 entry = ring_buffer_event_data(event);
3387 entry->ip = ip;
3388 entry->fmt = fmt;
3389
3390 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391 __buffer_unlock_commit(buffer, event);
3392 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3393 }
3394 out_put:
3395 put_trace_buf();
3396
3397 out_nobuffer:
3398 unpause_graph_tracing();
3399
3400 return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403
3404 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3405 int __trace_array_vprintk(struct trace_buffer *buffer,
3406 unsigned long ip, const char *fmt, va_list args)
3407 {
3408 struct ring_buffer_event *event;
3409 int len = 0, size;
3410 struct print_entry *entry;
3411 unsigned int trace_ctx;
3412 char *tbuffer;
3413
3414 if (tracing_disabled)
3415 return 0;
3416
3417 /* Don't pollute graph traces with trace_vprintk internals */
3418 pause_graph_tracing();
3419
3420 trace_ctx = tracing_gen_ctx();
3421 guard(preempt_notrace)();
3422
3423
3424 tbuffer = get_trace_buf();
3425 if (!tbuffer) {
3426 len = 0;
3427 goto out_nobuffer;
3428 }
3429
3430 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3431
3432 size = sizeof(*entry) + len + 1;
3433 scoped_guard(ring_buffer_nest, buffer) {
3434 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3435 trace_ctx);
3436 if (!event)
3437 goto out;
3438 entry = ring_buffer_event_data(event);
3439 entry->ip = ip;
3440
3441 memcpy(&entry->buf, tbuffer, len + 1);
3442 __buffer_unlock_commit(buffer, event);
3443 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3444 }
3445 out:
3446 put_trace_buf();
3447
3448 out_nobuffer:
3449 unpause_graph_tracing();
3450
3451 return len;
3452 }
3453
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3454 int trace_array_vprintk(struct trace_array *tr,
3455 unsigned long ip, const char *fmt, va_list args)
3456 {
3457 if (tracing_selftest_running && tr == &global_trace)
3458 return 0;
3459
3460 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3461 }
3462
3463 /**
3464 * trace_array_printk - Print a message to a specific instance
3465 * @tr: The instance trace_array descriptor
3466 * @ip: The instruction pointer that this is called from.
3467 * @fmt: The format to print (printf format)
3468 *
3469 * If a subsystem sets up its own instance, they have the right to
3470 * printk strings into their tracing instance buffer using this
3471 * function. Note, this function will not write into the top level
3472 * buffer (use trace_printk() for that), as writing into the top level
3473 * buffer should only have events that can be individually disabled.
3474 * trace_printk() is only used for debugging a kernel, and should not
3475 * be ever incorporated in normal use.
3476 *
3477 * trace_array_printk() can be used, as it will not add noise to the
3478 * top level tracing buffer.
3479 *
3480 * Note, trace_array_init_printk() must be called on @tr before this
3481 * can be used.
3482 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3483 int trace_array_printk(struct trace_array *tr,
3484 unsigned long ip, const char *fmt, ...)
3485 {
3486 int ret;
3487 va_list ap;
3488
3489 if (!tr)
3490 return -ENOENT;
3491
3492 /* This is only allowed for created instances */
3493 if (tr == &global_trace)
3494 return 0;
3495
3496 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3497 return 0;
3498
3499 va_start(ap, fmt);
3500 ret = trace_array_vprintk(tr, ip, fmt, ap);
3501 va_end(ap);
3502 return ret;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_array_printk);
3505
3506 /**
3507 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3508 * @tr: The trace array to initialize the buffers for
3509 *
3510 * As trace_array_printk() only writes into instances, they are OK to
3511 * have in the kernel (unlike trace_printk()). This needs to be called
3512 * before trace_array_printk() can be used on a trace_array.
3513 */
trace_array_init_printk(struct trace_array * tr)3514 int trace_array_init_printk(struct trace_array *tr)
3515 {
3516 if (!tr)
3517 return -ENOENT;
3518
3519 /* This is only allowed for created instances */
3520 if (tr == &global_trace)
3521 return -EINVAL;
3522
3523 return alloc_percpu_trace_buffer();
3524 }
3525 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3526
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3527 int trace_array_printk_buf(struct trace_buffer *buffer,
3528 unsigned long ip, const char *fmt, ...)
3529 {
3530 int ret;
3531 va_list ap;
3532
3533 if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3534 return 0;
3535
3536 va_start(ap, fmt);
3537 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3538 va_end(ap);
3539 return ret;
3540 }
3541
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3542 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3543 {
3544 return trace_array_vprintk(printk_trace, ip, fmt, args);
3545 }
3546 EXPORT_SYMBOL_GPL(trace_vprintk);
3547
trace_iterator_increment(struct trace_iterator * iter)3548 static void trace_iterator_increment(struct trace_iterator *iter)
3549 {
3550 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3551
3552 iter->idx++;
3553 if (buf_iter)
3554 ring_buffer_iter_advance(buf_iter);
3555 }
3556
3557 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3558 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3559 unsigned long *lost_events)
3560 {
3561 struct ring_buffer_event *event;
3562 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3563
3564 if (buf_iter) {
3565 event = ring_buffer_iter_peek(buf_iter, ts);
3566 if (lost_events)
3567 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3568 (unsigned long)-1 : 0;
3569 } else {
3570 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3571 lost_events);
3572 }
3573
3574 if (event) {
3575 iter->ent_size = ring_buffer_event_length(event);
3576 return ring_buffer_event_data(event);
3577 }
3578 iter->ent_size = 0;
3579 return NULL;
3580 }
3581
3582 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3583 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3584 unsigned long *missing_events, u64 *ent_ts)
3585 {
3586 struct trace_buffer *buffer = iter->array_buffer->buffer;
3587 struct trace_entry *ent, *next = NULL;
3588 unsigned long lost_events = 0, next_lost = 0;
3589 int cpu_file = iter->cpu_file;
3590 u64 next_ts = 0, ts;
3591 int next_cpu = -1;
3592 int next_size = 0;
3593 int cpu;
3594
3595 /*
3596 * If we are in a per_cpu trace file, don't bother by iterating over
3597 * all cpu and peek directly.
3598 */
3599 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3600 if (ring_buffer_empty_cpu(buffer, cpu_file))
3601 return NULL;
3602 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3603 if (ent_cpu)
3604 *ent_cpu = cpu_file;
3605
3606 return ent;
3607 }
3608
3609 for_each_tracing_cpu(cpu) {
3610
3611 if (ring_buffer_empty_cpu(buffer, cpu))
3612 continue;
3613
3614 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3615
3616 /*
3617 * Pick the entry with the smallest timestamp:
3618 */
3619 if (ent && (!next || ts < next_ts)) {
3620 next = ent;
3621 next_cpu = cpu;
3622 next_ts = ts;
3623 next_lost = lost_events;
3624 next_size = iter->ent_size;
3625 }
3626 }
3627
3628 iter->ent_size = next_size;
3629
3630 if (ent_cpu)
3631 *ent_cpu = next_cpu;
3632
3633 if (ent_ts)
3634 *ent_ts = next_ts;
3635
3636 if (missing_events)
3637 *missing_events = next_lost;
3638
3639 return next;
3640 }
3641
3642 #define STATIC_FMT_BUF_SIZE 128
3643 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3644
trace_iter_expand_format(struct trace_iterator * iter)3645 char *trace_iter_expand_format(struct trace_iterator *iter)
3646 {
3647 char *tmp;
3648
3649 /*
3650 * iter->tr is NULL when used with tp_printk, which makes
3651 * this get called where it is not safe to call krealloc().
3652 */
3653 if (!iter->tr || iter->fmt == static_fmt_buf)
3654 return NULL;
3655
3656 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3657 GFP_KERNEL);
3658 if (tmp) {
3659 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3660 iter->fmt = tmp;
3661 }
3662
3663 return tmp;
3664 }
3665
3666 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3667 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3668 {
3669 unsigned long addr = (unsigned long)str;
3670 struct trace_event *trace_event;
3671 struct trace_event_call *event;
3672
3673 /* OK if part of the event data */
3674 if ((addr >= (unsigned long)iter->ent) &&
3675 (addr < (unsigned long)iter->ent + iter->ent_size))
3676 return true;
3677
3678 /* OK if part of the temp seq buffer */
3679 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3680 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3681 return true;
3682
3683 /* Core rodata can not be freed */
3684 if (is_kernel_rodata(addr))
3685 return true;
3686
3687 if (trace_is_tracepoint_string(str))
3688 return true;
3689
3690 /*
3691 * Now this could be a module event, referencing core module
3692 * data, which is OK.
3693 */
3694 if (!iter->ent)
3695 return false;
3696
3697 trace_event = ftrace_find_event(iter->ent->type);
3698 if (!trace_event)
3699 return false;
3700
3701 event = container_of(trace_event, struct trace_event_call, event);
3702 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3703 return false;
3704
3705 /* Would rather have rodata, but this will suffice */
3706 if (within_module_core(addr, event->module))
3707 return true;
3708
3709 return false;
3710 }
3711
3712 /**
3713 * ignore_event - Check dereferenced fields while writing to the seq buffer
3714 * @iter: The iterator that holds the seq buffer and the event being printed
3715 *
3716 * At boot up, test_event_printk() will flag any event that dereferences
3717 * a string with "%s" that does exist in the ring buffer. It may still
3718 * be valid, as the string may point to a static string in the kernel
3719 * rodata that never gets freed. But if the string pointer is pointing
3720 * to something that was allocated, there's a chance that it can be freed
3721 * by the time the user reads the trace. This would cause a bad memory
3722 * access by the kernel and possibly crash the system.
3723 *
3724 * This function will check if the event has any fields flagged as needing
3725 * to be checked at runtime and perform those checks.
3726 *
3727 * If it is found that a field is unsafe, it will write into the @iter->seq
3728 * a message stating what was found to be unsafe.
3729 *
3730 * @return: true if the event is unsafe and should be ignored,
3731 * false otherwise.
3732 */
ignore_event(struct trace_iterator * iter)3733 bool ignore_event(struct trace_iterator *iter)
3734 {
3735 struct ftrace_event_field *field;
3736 struct trace_event *trace_event;
3737 struct trace_event_call *event;
3738 struct list_head *head;
3739 struct trace_seq *seq;
3740 const void *ptr;
3741
3742 trace_event = ftrace_find_event(iter->ent->type);
3743
3744 seq = &iter->seq;
3745
3746 if (!trace_event) {
3747 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3748 return true;
3749 }
3750
3751 event = container_of(trace_event, struct trace_event_call, event);
3752 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3753 return false;
3754
3755 head = trace_get_fields(event);
3756 if (!head) {
3757 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3758 trace_event_name(event));
3759 return true;
3760 }
3761
3762 /* Offsets are from the iter->ent that points to the raw event */
3763 ptr = iter->ent;
3764
3765 list_for_each_entry(field, head, link) {
3766 const char *str;
3767 bool good;
3768
3769 if (!field->needs_test)
3770 continue;
3771
3772 str = *(const char **)(ptr + field->offset);
3773
3774 good = trace_safe_str(iter, str);
3775
3776 /*
3777 * If you hit this warning, it is likely that the
3778 * trace event in question used %s on a string that
3779 * was saved at the time of the event, but may not be
3780 * around when the trace is read. Use __string(),
3781 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3782 * instead. See samples/trace_events/trace-events-sample.h
3783 * for reference.
3784 */
3785 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3786 trace_event_name(event), field->name)) {
3787 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3788 trace_event_name(event), field->name);
3789 return true;
3790 }
3791 }
3792 return false;
3793 }
3794
trace_event_format(struct trace_iterator * iter,const char * fmt)3795 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3796 {
3797 const char *p, *new_fmt;
3798 char *q;
3799
3800 if (WARN_ON_ONCE(!fmt))
3801 return fmt;
3802
3803 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3804 return fmt;
3805
3806 p = fmt;
3807 new_fmt = q = iter->fmt;
3808 while (*p) {
3809 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3810 if (!trace_iter_expand_format(iter))
3811 return fmt;
3812
3813 q += iter->fmt - new_fmt;
3814 new_fmt = iter->fmt;
3815 }
3816
3817 *q++ = *p++;
3818
3819 /* Replace %p with %px */
3820 if (p[-1] == '%') {
3821 if (p[0] == '%') {
3822 *q++ = *p++;
3823 } else if (p[0] == 'p' && !isalnum(p[1])) {
3824 *q++ = *p++;
3825 *q++ = 'x';
3826 }
3827 }
3828 }
3829 *q = '\0';
3830
3831 return new_fmt;
3832 }
3833
3834 #define STATIC_TEMP_BUF_SIZE 128
3835 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3836
3837 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3838 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3839 int *ent_cpu, u64 *ent_ts)
3840 {
3841 /* __find_next_entry will reset ent_size */
3842 int ent_size = iter->ent_size;
3843 struct trace_entry *entry;
3844
3845 /*
3846 * If called from ftrace_dump(), then the iter->temp buffer
3847 * will be the static_temp_buf and not created from kmalloc.
3848 * If the entry size is greater than the buffer, we can
3849 * not save it. Just return NULL in that case. This is only
3850 * used to add markers when two consecutive events' time
3851 * stamps have a large delta. See trace_print_lat_context()
3852 */
3853 if (iter->temp == static_temp_buf &&
3854 STATIC_TEMP_BUF_SIZE < ent_size)
3855 return NULL;
3856
3857 /*
3858 * The __find_next_entry() may call peek_next_entry(), which may
3859 * call ring_buffer_peek() that may make the contents of iter->ent
3860 * undefined. Need to copy iter->ent now.
3861 */
3862 if (iter->ent && iter->ent != iter->temp) {
3863 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3864 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3865 void *temp;
3866 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3867 if (!temp)
3868 return NULL;
3869 kfree(iter->temp);
3870 iter->temp = temp;
3871 iter->temp_size = iter->ent_size;
3872 }
3873 memcpy(iter->temp, iter->ent, iter->ent_size);
3874 iter->ent = iter->temp;
3875 }
3876 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3877 /* Put back the original ent_size */
3878 iter->ent_size = ent_size;
3879
3880 return entry;
3881 }
3882
3883 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3884 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3885 {
3886 iter->ent = __find_next_entry(iter, &iter->cpu,
3887 &iter->lost_events, &iter->ts);
3888
3889 if (iter->ent)
3890 trace_iterator_increment(iter);
3891
3892 return iter->ent ? iter : NULL;
3893 }
3894
trace_consume(struct trace_iterator * iter)3895 static void trace_consume(struct trace_iterator *iter)
3896 {
3897 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3898 &iter->lost_events);
3899 }
3900
s_next(struct seq_file * m,void * v,loff_t * pos)3901 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3902 {
3903 struct trace_iterator *iter = m->private;
3904 int i = (int)*pos;
3905 void *ent;
3906
3907 WARN_ON_ONCE(iter->leftover);
3908
3909 (*pos)++;
3910
3911 /* can't go backwards */
3912 if (iter->idx > i)
3913 return NULL;
3914
3915 if (iter->idx < 0)
3916 ent = trace_find_next_entry_inc(iter);
3917 else
3918 ent = iter;
3919
3920 while (ent && iter->idx < i)
3921 ent = trace_find_next_entry_inc(iter);
3922
3923 iter->pos = *pos;
3924
3925 return ent;
3926 }
3927
tracing_iter_reset(struct trace_iterator * iter,int cpu)3928 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3929 {
3930 struct ring_buffer_iter *buf_iter;
3931 unsigned long entries = 0;
3932 u64 ts;
3933
3934 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3935
3936 buf_iter = trace_buffer_iter(iter, cpu);
3937 if (!buf_iter)
3938 return;
3939
3940 ring_buffer_iter_reset(buf_iter);
3941
3942 /*
3943 * We could have the case with the max latency tracers
3944 * that a reset never took place on a cpu. This is evident
3945 * by the timestamp being before the start of the buffer.
3946 */
3947 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3948 if (ts >= iter->array_buffer->time_start)
3949 break;
3950 entries++;
3951 ring_buffer_iter_advance(buf_iter);
3952 /* This could be a big loop */
3953 cond_resched();
3954 }
3955
3956 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3957 }
3958
3959 /*
3960 * The current tracer is copied to avoid a global locking
3961 * all around.
3962 */
s_start(struct seq_file * m,loff_t * pos)3963 static void *s_start(struct seq_file *m, loff_t *pos)
3964 {
3965 struct trace_iterator *iter = m->private;
3966 struct trace_array *tr = iter->tr;
3967 int cpu_file = iter->cpu_file;
3968 void *p = NULL;
3969 loff_t l = 0;
3970 int cpu;
3971
3972 mutex_lock(&trace_types_lock);
3973 if (unlikely(tr->current_trace != iter->trace)) {
3974 /* Close iter->trace before switching to the new current tracer */
3975 if (iter->trace->close)
3976 iter->trace->close(iter);
3977 iter->trace = tr->current_trace;
3978 /* Reopen the new current tracer */
3979 if (iter->trace->open)
3980 iter->trace->open(iter);
3981 }
3982 mutex_unlock(&trace_types_lock);
3983
3984 #ifdef CONFIG_TRACER_MAX_TRACE
3985 if (iter->snapshot && iter->trace->use_max_tr)
3986 return ERR_PTR(-EBUSY);
3987 #endif
3988
3989 if (*pos != iter->pos) {
3990 iter->ent = NULL;
3991 iter->cpu = 0;
3992 iter->idx = -1;
3993
3994 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3995 for_each_tracing_cpu(cpu)
3996 tracing_iter_reset(iter, cpu);
3997 } else
3998 tracing_iter_reset(iter, cpu_file);
3999
4000 iter->leftover = 0;
4001 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4002 ;
4003
4004 } else {
4005 /*
4006 * If we overflowed the seq_file before, then we want
4007 * to just reuse the trace_seq buffer again.
4008 */
4009 if (iter->leftover)
4010 p = iter;
4011 else {
4012 l = *pos - 1;
4013 p = s_next(m, p, &l);
4014 }
4015 }
4016
4017 trace_event_read_lock();
4018 trace_access_lock(cpu_file);
4019 return p;
4020 }
4021
s_stop(struct seq_file * m,void * p)4022 static void s_stop(struct seq_file *m, void *p)
4023 {
4024 struct trace_iterator *iter = m->private;
4025
4026 #ifdef CONFIG_TRACER_MAX_TRACE
4027 if (iter->snapshot && iter->trace->use_max_tr)
4028 return;
4029 #endif
4030
4031 trace_access_unlock(iter->cpu_file);
4032 trace_event_read_unlock();
4033 }
4034
4035 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4036 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4037 unsigned long *entries, int cpu)
4038 {
4039 unsigned long count;
4040
4041 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4042 /*
4043 * If this buffer has skipped entries, then we hold all
4044 * entries for the trace and we need to ignore the
4045 * ones before the time stamp.
4046 */
4047 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4048 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4049 /* total is the same as the entries */
4050 *total = count;
4051 } else
4052 *total = count +
4053 ring_buffer_overrun_cpu(buf->buffer, cpu);
4054 *entries = count;
4055 }
4056
4057 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4058 get_total_entries(struct array_buffer *buf,
4059 unsigned long *total, unsigned long *entries)
4060 {
4061 unsigned long t, e;
4062 int cpu;
4063
4064 *total = 0;
4065 *entries = 0;
4066
4067 for_each_tracing_cpu(cpu) {
4068 get_total_entries_cpu(buf, &t, &e, cpu);
4069 *total += t;
4070 *entries += e;
4071 }
4072 }
4073
trace_total_entries_cpu(struct trace_array * tr,int cpu)4074 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4075 {
4076 unsigned long total, entries;
4077
4078 if (!tr)
4079 tr = &global_trace;
4080
4081 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4082
4083 return entries;
4084 }
4085
trace_total_entries(struct trace_array * tr)4086 unsigned long trace_total_entries(struct trace_array *tr)
4087 {
4088 unsigned long total, entries;
4089
4090 if (!tr)
4091 tr = &global_trace;
4092
4093 get_total_entries(&tr->array_buffer, &total, &entries);
4094
4095 return entries;
4096 }
4097
print_lat_help_header(struct seq_file * m)4098 static void print_lat_help_header(struct seq_file *m)
4099 {
4100 seq_puts(m, "# _------=> CPU# \n"
4101 "# / _-----=> irqs-off/BH-disabled\n"
4102 "# | / _----=> need-resched \n"
4103 "# || / _---=> hardirq/softirq \n"
4104 "# ||| / _--=> preempt-depth \n"
4105 "# |||| / _-=> migrate-disable \n"
4106 "# ||||| / delay \n"
4107 "# cmd pid |||||| time | caller \n"
4108 "# \\ / |||||| \\ | / \n");
4109 }
4110
print_event_info(struct array_buffer * buf,struct seq_file * m)4111 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4112 {
4113 unsigned long total;
4114 unsigned long entries;
4115
4116 get_total_entries(buf, &total, &entries);
4117 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4118 entries, total, num_online_cpus());
4119 seq_puts(m, "#\n");
4120 }
4121
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4122 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4123 unsigned int flags)
4124 {
4125 bool tgid = flags & TRACE_ITER(RECORD_TGID);
4126
4127 print_event_info(buf, m);
4128
4129 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4130 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4131 }
4132
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4133 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4134 unsigned int flags)
4135 {
4136 bool tgid = flags & TRACE_ITER(RECORD_TGID);
4137 static const char space[] = " ";
4138 int prec = tgid ? 12 : 2;
4139
4140 print_event_info(buf, m);
4141
4142 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4143 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4144 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4145 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4146 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4147 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4148 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4149 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4150 }
4151
4152 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4154 {
4155 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4156 struct array_buffer *buf = iter->array_buffer;
4157 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4158 struct tracer *type = iter->trace;
4159 unsigned long entries;
4160 unsigned long total;
4161 const char *name = type->name;
4162
4163 get_total_entries(buf, &total, &entries);
4164
4165 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4166 name, init_utsname()->release);
4167 seq_puts(m, "# -----------------------------------"
4168 "---------------------------------\n");
4169 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4170 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4171 nsecs_to_usecs(data->saved_latency),
4172 entries,
4173 total,
4174 buf->cpu,
4175 preempt_model_str(),
4176 /* These are reserved for later use */
4177 0, 0, 0, 0);
4178 #ifdef CONFIG_SMP
4179 seq_printf(m, " #P:%d)\n", num_online_cpus());
4180 #else
4181 seq_puts(m, ")\n");
4182 #endif
4183 seq_puts(m, "# -----------------\n");
4184 seq_printf(m, "# | task: %.16s-%d "
4185 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4186 data->comm, data->pid,
4187 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4188 data->policy, data->rt_priority);
4189 seq_puts(m, "# -----------------\n");
4190
4191 if (data->critical_start) {
4192 seq_puts(m, "# => started at: ");
4193 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4194 trace_print_seq(m, &iter->seq);
4195 seq_puts(m, "\n# => ended at: ");
4196 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4197 trace_print_seq(m, &iter->seq);
4198 seq_puts(m, "\n#\n");
4199 }
4200
4201 seq_puts(m, "#\n");
4202 }
4203
test_cpu_buff_start(struct trace_iterator * iter)4204 static void test_cpu_buff_start(struct trace_iterator *iter)
4205 {
4206 struct trace_seq *s = &iter->seq;
4207 struct trace_array *tr = iter->tr;
4208
4209 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4210 return;
4211
4212 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4213 return;
4214
4215 if (cpumask_available(iter->started) &&
4216 cpumask_test_cpu(iter->cpu, iter->started))
4217 return;
4218
4219 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4220 return;
4221
4222 if (cpumask_available(iter->started))
4223 cpumask_set_cpu(iter->cpu, iter->started);
4224
4225 /* Don't print started cpu buffer for the first entry of the trace */
4226 if (iter->idx > 1)
4227 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4228 iter->cpu);
4229 }
4230
4231 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)4232 static bool is_syscall_event(struct trace_event *event)
4233 {
4234 return (event->funcs == &enter_syscall_print_funcs) ||
4235 (event->funcs == &exit_syscall_print_funcs);
4236
4237 }
4238 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4239 #else
is_syscall_event(struct trace_event * event)4240 static inline bool is_syscall_event(struct trace_event *event)
4241 {
4242 return false;
4243 }
4244 #define syscall_buf_size 0
4245 #endif /* CONFIG_FTRACE_SYSCALLS */
4246
print_trace_fmt(struct trace_iterator * iter)4247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4248 {
4249 struct trace_array *tr = iter->tr;
4250 struct trace_seq *s = &iter->seq;
4251 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4252 struct trace_entry *entry;
4253 struct trace_event *event;
4254
4255 entry = iter->ent;
4256
4257 test_cpu_buff_start(iter);
4258
4259 event = ftrace_find_event(entry->type);
4260
4261 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4262 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4263 trace_print_lat_context(iter);
4264 else
4265 trace_print_context(iter);
4266 }
4267
4268 if (trace_seq_has_overflowed(s))
4269 return TRACE_TYPE_PARTIAL_LINE;
4270
4271 if (event) {
4272 if (tr->trace_flags & TRACE_ITER(FIELDS))
4273 return print_event_fields(iter, event);
4274 /*
4275 * For TRACE_EVENT() events, the print_fmt is not
4276 * safe to use if the array has delta offsets
4277 * Force printing via the fields.
4278 */
4279 if ((tr->text_delta)) {
4280 /* ftrace and system call events are still OK */
4281 if ((event->type > __TRACE_LAST_TYPE) &&
4282 !is_syscall_event(event))
4283 return print_event_fields(iter, event);
4284 }
4285 return event->funcs->trace(iter, sym_flags, event);
4286 }
4287
4288 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4289
4290 return trace_handle_return(s);
4291 }
4292
print_raw_fmt(struct trace_iterator * iter)4293 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4294 {
4295 struct trace_array *tr = iter->tr;
4296 struct trace_seq *s = &iter->seq;
4297 struct trace_entry *entry;
4298 struct trace_event *event;
4299
4300 entry = iter->ent;
4301
4302 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4303 trace_seq_printf(s, "%d %d %llu ",
4304 entry->pid, iter->cpu, iter->ts);
4305
4306 if (trace_seq_has_overflowed(s))
4307 return TRACE_TYPE_PARTIAL_LINE;
4308
4309 event = ftrace_find_event(entry->type);
4310 if (event)
4311 return event->funcs->raw(iter, 0, event);
4312
4313 trace_seq_printf(s, "%d ?\n", entry->type);
4314
4315 return trace_handle_return(s);
4316 }
4317
print_hex_fmt(struct trace_iterator * iter)4318 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4319 {
4320 struct trace_array *tr = iter->tr;
4321 struct trace_seq *s = &iter->seq;
4322 unsigned char newline = '\n';
4323 struct trace_entry *entry;
4324 struct trace_event *event;
4325
4326 entry = iter->ent;
4327
4328 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4329 SEQ_PUT_HEX_FIELD(s, entry->pid);
4330 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4331 SEQ_PUT_HEX_FIELD(s, iter->ts);
4332 if (trace_seq_has_overflowed(s))
4333 return TRACE_TYPE_PARTIAL_LINE;
4334 }
4335
4336 event = ftrace_find_event(entry->type);
4337 if (event) {
4338 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4339 if (ret != TRACE_TYPE_HANDLED)
4340 return ret;
4341 }
4342
4343 SEQ_PUT_FIELD(s, newline);
4344
4345 return trace_handle_return(s);
4346 }
4347
print_bin_fmt(struct trace_iterator * iter)4348 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4349 {
4350 struct trace_array *tr = iter->tr;
4351 struct trace_seq *s = &iter->seq;
4352 struct trace_entry *entry;
4353 struct trace_event *event;
4354
4355 entry = iter->ent;
4356
4357 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4358 SEQ_PUT_FIELD(s, entry->pid);
4359 SEQ_PUT_FIELD(s, iter->cpu);
4360 SEQ_PUT_FIELD(s, iter->ts);
4361 if (trace_seq_has_overflowed(s))
4362 return TRACE_TYPE_PARTIAL_LINE;
4363 }
4364
4365 event = ftrace_find_event(entry->type);
4366 return event ? event->funcs->binary(iter, 0, event) :
4367 TRACE_TYPE_HANDLED;
4368 }
4369
trace_empty(struct trace_iterator * iter)4370 int trace_empty(struct trace_iterator *iter)
4371 {
4372 struct ring_buffer_iter *buf_iter;
4373 int cpu;
4374
4375 /* If we are looking at one CPU buffer, only check that one */
4376 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4377 cpu = iter->cpu_file;
4378 buf_iter = trace_buffer_iter(iter, cpu);
4379 if (buf_iter) {
4380 if (!ring_buffer_iter_empty(buf_iter))
4381 return 0;
4382 } else {
4383 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4384 return 0;
4385 }
4386 return 1;
4387 }
4388
4389 for_each_tracing_cpu(cpu) {
4390 buf_iter = trace_buffer_iter(iter, cpu);
4391 if (buf_iter) {
4392 if (!ring_buffer_iter_empty(buf_iter))
4393 return 0;
4394 } else {
4395 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4396 return 0;
4397 }
4398 }
4399
4400 return 1;
4401 }
4402
4403 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4404 enum print_line_t print_trace_line(struct trace_iterator *iter)
4405 {
4406 struct trace_array *tr = iter->tr;
4407 unsigned long trace_flags = tr->trace_flags;
4408 enum print_line_t ret;
4409
4410 if (iter->lost_events) {
4411 if (iter->lost_events == (unsigned long)-1)
4412 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4413 iter->cpu);
4414 else
4415 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4416 iter->cpu, iter->lost_events);
4417 if (trace_seq_has_overflowed(&iter->seq))
4418 return TRACE_TYPE_PARTIAL_LINE;
4419 }
4420
4421 if (iter->trace && iter->trace->print_line) {
4422 ret = iter->trace->print_line(iter);
4423 if (ret != TRACE_TYPE_UNHANDLED)
4424 return ret;
4425 }
4426
4427 if (iter->ent->type == TRACE_BPUTS &&
4428 trace_flags & TRACE_ITER(PRINTK) &&
4429 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4430 return trace_print_bputs_msg_only(iter);
4431
4432 if (iter->ent->type == TRACE_BPRINT &&
4433 trace_flags & TRACE_ITER(PRINTK) &&
4434 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4435 return trace_print_bprintk_msg_only(iter);
4436
4437 if (iter->ent->type == TRACE_PRINT &&
4438 trace_flags & TRACE_ITER(PRINTK) &&
4439 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4440 return trace_print_printk_msg_only(iter);
4441
4442 if (trace_flags & TRACE_ITER(BIN))
4443 return print_bin_fmt(iter);
4444
4445 if (trace_flags & TRACE_ITER(HEX))
4446 return print_hex_fmt(iter);
4447
4448 if (trace_flags & TRACE_ITER(RAW))
4449 return print_raw_fmt(iter);
4450
4451 return print_trace_fmt(iter);
4452 }
4453
trace_latency_header(struct seq_file * m)4454 void trace_latency_header(struct seq_file *m)
4455 {
4456 struct trace_iterator *iter = m->private;
4457 struct trace_array *tr = iter->tr;
4458
4459 /* print nothing if the buffers are empty */
4460 if (trace_empty(iter))
4461 return;
4462
4463 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4464 print_trace_header(m, iter);
4465
4466 if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4467 print_lat_help_header(m);
4468 }
4469
trace_default_header(struct seq_file * m)4470 void trace_default_header(struct seq_file *m)
4471 {
4472 struct trace_iterator *iter = m->private;
4473 struct trace_array *tr = iter->tr;
4474 unsigned long trace_flags = tr->trace_flags;
4475
4476 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4477 return;
4478
4479 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4480 /* print nothing if the buffers are empty */
4481 if (trace_empty(iter))
4482 return;
4483 print_trace_header(m, iter);
4484 if (!(trace_flags & TRACE_ITER(VERBOSE)))
4485 print_lat_help_header(m);
4486 } else {
4487 if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4488 if (trace_flags & TRACE_ITER(IRQ_INFO))
4489 print_func_help_header_irq(iter->array_buffer,
4490 m, trace_flags);
4491 else
4492 print_func_help_header(iter->array_buffer, m,
4493 trace_flags);
4494 }
4495 }
4496 }
4497
test_ftrace_alive(struct seq_file * m)4498 static void test_ftrace_alive(struct seq_file *m)
4499 {
4500 if (!ftrace_is_dead())
4501 return;
4502 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4503 "# MAY BE MISSING FUNCTION EVENTS\n");
4504 }
4505
4506 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4507 static void show_snapshot_main_help(struct seq_file *m)
4508 {
4509 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4510 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4511 "# Takes a snapshot of the main buffer.\n"
4512 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4513 "# (Doesn't have to be '2' works with any number that\n"
4514 "# is not a '0' or '1')\n");
4515 }
4516
show_snapshot_percpu_help(struct seq_file * m)4517 static void show_snapshot_percpu_help(struct seq_file *m)
4518 {
4519 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4520 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4521 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4522 "# Takes a snapshot of the main buffer for this cpu.\n");
4523 #else
4524 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4525 "# Must use main snapshot file to allocate.\n");
4526 #endif
4527 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4528 "# (Doesn't have to be '2' works with any number that\n"
4529 "# is not a '0' or '1')\n");
4530 }
4531
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4532 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4533 {
4534 if (iter->tr->allocated_snapshot)
4535 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4536 else
4537 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4538
4539 seq_puts(m, "# Snapshot commands:\n");
4540 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4541 show_snapshot_main_help(m);
4542 else
4543 show_snapshot_percpu_help(m);
4544 }
4545 #else
4546 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4547 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4548 #endif
4549
s_show(struct seq_file * m,void * v)4550 static int s_show(struct seq_file *m, void *v)
4551 {
4552 struct trace_iterator *iter = v;
4553 int ret;
4554
4555 if (iter->ent == NULL) {
4556 if (iter->tr) {
4557 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4558 seq_puts(m, "#\n");
4559 test_ftrace_alive(m);
4560 }
4561 if (iter->snapshot && trace_empty(iter))
4562 print_snapshot_help(m, iter);
4563 else if (iter->trace && iter->trace->print_header)
4564 iter->trace->print_header(m);
4565 else
4566 trace_default_header(m);
4567
4568 } else if (iter->leftover) {
4569 /*
4570 * If we filled the seq_file buffer earlier, we
4571 * want to just show it now.
4572 */
4573 ret = trace_print_seq(m, &iter->seq);
4574
4575 /* ret should this time be zero, but you never know */
4576 iter->leftover = ret;
4577
4578 } else {
4579 ret = print_trace_line(iter);
4580 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4581 iter->seq.full = 0;
4582 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4583 }
4584 ret = trace_print_seq(m, &iter->seq);
4585 /*
4586 * If we overflow the seq_file buffer, then it will
4587 * ask us for this data again at start up.
4588 * Use that instead.
4589 * ret is 0 if seq_file write succeeded.
4590 * -1 otherwise.
4591 */
4592 iter->leftover = ret;
4593 }
4594
4595 return 0;
4596 }
4597
4598 /*
4599 * Should be used after trace_array_get(), trace_types_lock
4600 * ensures that i_cdev was already initialized.
4601 */
tracing_get_cpu(struct inode * inode)4602 static inline int tracing_get_cpu(struct inode *inode)
4603 {
4604 if (inode->i_cdev) /* See trace_create_cpu_file() */
4605 return (long)inode->i_cdev - 1;
4606 return RING_BUFFER_ALL_CPUS;
4607 }
4608
4609 static const struct seq_operations tracer_seq_ops = {
4610 .start = s_start,
4611 .next = s_next,
4612 .stop = s_stop,
4613 .show = s_show,
4614 };
4615
4616 /*
4617 * Note, as iter itself can be allocated and freed in different
4618 * ways, this function is only used to free its content, and not
4619 * the iterator itself. The only requirement to all the allocations
4620 * is that it must zero all fields (kzalloc), as freeing works with
4621 * ethier allocated content or NULL.
4622 */
free_trace_iter_content(struct trace_iterator * iter)4623 static void free_trace_iter_content(struct trace_iterator *iter)
4624 {
4625 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4626 if (iter->fmt != static_fmt_buf)
4627 kfree(iter->fmt);
4628
4629 kfree(iter->temp);
4630 kfree(iter->buffer_iter);
4631 mutex_destroy(&iter->mutex);
4632 free_cpumask_var(iter->started);
4633 }
4634
4635 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4637 {
4638 struct trace_array *tr = inode->i_private;
4639 struct trace_iterator *iter;
4640 int cpu;
4641
4642 if (tracing_disabled)
4643 return ERR_PTR(-ENODEV);
4644
4645 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4646 if (!iter)
4647 return ERR_PTR(-ENOMEM);
4648
4649 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4650 GFP_KERNEL);
4651 if (!iter->buffer_iter)
4652 goto release;
4653
4654 /*
4655 * trace_find_next_entry() may need to save off iter->ent.
4656 * It will place it into the iter->temp buffer. As most
4657 * events are less than 128, allocate a buffer of that size.
4658 * If one is greater, then trace_find_next_entry() will
4659 * allocate a new buffer to adjust for the bigger iter->ent.
4660 * It's not critical if it fails to get allocated here.
4661 */
4662 iter->temp = kmalloc(128, GFP_KERNEL);
4663 if (iter->temp)
4664 iter->temp_size = 128;
4665
4666 /*
4667 * trace_event_printf() may need to modify given format
4668 * string to replace %p with %px so that it shows real address
4669 * instead of hash value. However, that is only for the event
4670 * tracing, other tracer may not need. Defer the allocation
4671 * until it is needed.
4672 */
4673 iter->fmt = NULL;
4674 iter->fmt_size = 0;
4675
4676 mutex_lock(&trace_types_lock);
4677 iter->trace = tr->current_trace;
4678
4679 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4680 goto fail;
4681
4682 iter->tr = tr;
4683
4684 #ifdef CONFIG_TRACER_MAX_TRACE
4685 /* Currently only the top directory has a snapshot */
4686 if (tr->current_trace->print_max || snapshot)
4687 iter->array_buffer = &tr->max_buffer;
4688 else
4689 #endif
4690 iter->array_buffer = &tr->array_buffer;
4691 iter->snapshot = snapshot;
4692 iter->pos = -1;
4693 iter->cpu_file = tracing_get_cpu(inode);
4694 mutex_init(&iter->mutex);
4695
4696 /* Notify the tracer early; before we stop tracing. */
4697 if (iter->trace->open)
4698 iter->trace->open(iter);
4699
4700 /* Annotate start of buffers if we had overruns */
4701 if (ring_buffer_overruns(iter->array_buffer->buffer))
4702 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4703
4704 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4705 if (trace_clocks[tr->clock_id].in_ns)
4706 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4707
4708 /*
4709 * If pause-on-trace is enabled, then stop the trace while
4710 * dumping, unless this is the "snapshot" file
4711 */
4712 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4713 iter->iter_flags |= TRACE_FILE_PAUSE;
4714 tracing_stop_tr(tr);
4715 }
4716
4717 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4718 for_each_tracing_cpu(cpu) {
4719 iter->buffer_iter[cpu] =
4720 ring_buffer_read_start(iter->array_buffer->buffer,
4721 cpu, GFP_KERNEL);
4722 tracing_iter_reset(iter, cpu);
4723 }
4724 } else {
4725 cpu = iter->cpu_file;
4726 iter->buffer_iter[cpu] =
4727 ring_buffer_read_start(iter->array_buffer->buffer,
4728 cpu, GFP_KERNEL);
4729 tracing_iter_reset(iter, cpu);
4730 }
4731
4732 mutex_unlock(&trace_types_lock);
4733
4734 return iter;
4735
4736 fail:
4737 mutex_unlock(&trace_types_lock);
4738 free_trace_iter_content(iter);
4739 release:
4740 seq_release_private(inode, file);
4741 return ERR_PTR(-ENOMEM);
4742 }
4743
tracing_open_generic(struct inode * inode,struct file * filp)4744 int tracing_open_generic(struct inode *inode, struct file *filp)
4745 {
4746 int ret;
4747
4748 ret = tracing_check_open_get_tr(NULL);
4749 if (ret)
4750 return ret;
4751
4752 filp->private_data = inode->i_private;
4753 return 0;
4754 }
4755
tracing_is_disabled(void)4756 bool tracing_is_disabled(void)
4757 {
4758 return (tracing_disabled) ? true: false;
4759 }
4760
4761 /*
4762 * Open and update trace_array ref count.
4763 * Must have the current trace_array passed to it.
4764 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4765 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4766 {
4767 struct trace_array *tr = inode->i_private;
4768 int ret;
4769
4770 ret = tracing_check_open_get_tr(tr);
4771 if (ret)
4772 return ret;
4773
4774 filp->private_data = inode->i_private;
4775
4776 return 0;
4777 }
4778
4779 /*
4780 * The private pointer of the inode is the trace_event_file.
4781 * Update the tr ref count associated to it.
4782 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4783 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4784 {
4785 struct trace_event_file *file = inode->i_private;
4786 int ret;
4787
4788 ret = tracing_check_open_get_tr(file->tr);
4789 if (ret)
4790 return ret;
4791
4792 guard(mutex)(&event_mutex);
4793
4794 /* Fail if the file is marked for removal */
4795 if (file->flags & EVENT_FILE_FL_FREED) {
4796 trace_array_put(file->tr);
4797 return -ENODEV;
4798 } else {
4799 event_file_get(file);
4800 }
4801
4802 filp->private_data = inode->i_private;
4803
4804 return 0;
4805 }
4806
tracing_release_file_tr(struct inode * inode,struct file * filp)4807 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4808 {
4809 struct trace_event_file *file = inode->i_private;
4810
4811 trace_array_put(file->tr);
4812 event_file_put(file);
4813
4814 return 0;
4815 }
4816
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4817 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4818 {
4819 tracing_release_file_tr(inode, filp);
4820 return single_release(inode, filp);
4821 }
4822
tracing_release(struct inode * inode,struct file * file)4823 static int tracing_release(struct inode *inode, struct file *file)
4824 {
4825 struct trace_array *tr = inode->i_private;
4826 struct seq_file *m = file->private_data;
4827 struct trace_iterator *iter;
4828 int cpu;
4829
4830 if (!(file->f_mode & FMODE_READ)) {
4831 trace_array_put(tr);
4832 return 0;
4833 }
4834
4835 /* Writes do not use seq_file */
4836 iter = m->private;
4837 mutex_lock(&trace_types_lock);
4838
4839 for_each_tracing_cpu(cpu) {
4840 if (iter->buffer_iter[cpu])
4841 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4842 }
4843
4844 if (iter->trace && iter->trace->close)
4845 iter->trace->close(iter);
4846
4847 if (iter->iter_flags & TRACE_FILE_PAUSE)
4848 /* reenable tracing if it was previously enabled */
4849 tracing_start_tr(tr);
4850
4851 __trace_array_put(tr);
4852
4853 mutex_unlock(&trace_types_lock);
4854
4855 free_trace_iter_content(iter);
4856 seq_release_private(inode, file);
4857
4858 return 0;
4859 }
4860
tracing_release_generic_tr(struct inode * inode,struct file * file)4861 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4862 {
4863 struct trace_array *tr = inode->i_private;
4864
4865 trace_array_put(tr);
4866 return 0;
4867 }
4868
tracing_single_release_tr(struct inode * inode,struct file * file)4869 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4870 {
4871 struct trace_array *tr = inode->i_private;
4872
4873 trace_array_put(tr);
4874
4875 return single_release(inode, file);
4876 }
4877
tracing_open(struct inode * inode,struct file * file)4878 static int tracing_open(struct inode *inode, struct file *file)
4879 {
4880 struct trace_array *tr = inode->i_private;
4881 struct trace_iterator *iter;
4882 int ret;
4883
4884 ret = tracing_check_open_get_tr(tr);
4885 if (ret)
4886 return ret;
4887
4888 /* If this file was open for write, then erase contents */
4889 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4890 int cpu = tracing_get_cpu(inode);
4891 struct array_buffer *trace_buf = &tr->array_buffer;
4892
4893 #ifdef CONFIG_TRACER_MAX_TRACE
4894 if (tr->current_trace->print_max)
4895 trace_buf = &tr->max_buffer;
4896 #endif
4897
4898 if (cpu == RING_BUFFER_ALL_CPUS)
4899 tracing_reset_online_cpus(trace_buf);
4900 else
4901 tracing_reset_cpu(trace_buf, cpu);
4902 }
4903
4904 if (file->f_mode & FMODE_READ) {
4905 iter = __tracing_open(inode, file, false);
4906 if (IS_ERR(iter))
4907 ret = PTR_ERR(iter);
4908 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4909 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4910 }
4911
4912 if (ret < 0)
4913 trace_array_put(tr);
4914
4915 return ret;
4916 }
4917
4918 /*
4919 * Some tracers are not suitable for instance buffers.
4920 * A tracer is always available for the global array (toplevel)
4921 * or if it explicitly states that it is.
4922 */
4923 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4924 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4925 {
4926 #ifdef CONFIG_TRACER_SNAPSHOT
4927 /* arrays with mapped buffer range do not have snapshots */
4928 if (tr->range_addr_start && t->use_max_tr)
4929 return false;
4930 #endif
4931 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4932 }
4933
4934 /* Find the next tracer that this trace array may use */
4935 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4936 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4937 {
4938 while (t && !trace_ok_for_array(t, tr))
4939 t = t->next;
4940
4941 return t;
4942 }
4943
4944 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4945 t_next(struct seq_file *m, void *v, loff_t *pos)
4946 {
4947 struct trace_array *tr = m->private;
4948 struct tracer *t = v;
4949
4950 (*pos)++;
4951
4952 if (t)
4953 t = get_tracer_for_array(tr, t->next);
4954
4955 return t;
4956 }
4957
t_start(struct seq_file * m,loff_t * pos)4958 static void *t_start(struct seq_file *m, loff_t *pos)
4959 {
4960 struct trace_array *tr = m->private;
4961 struct tracer *t;
4962 loff_t l = 0;
4963
4964 mutex_lock(&trace_types_lock);
4965
4966 t = get_tracer_for_array(tr, trace_types);
4967 for (; t && l < *pos; t = t_next(m, t, &l))
4968 ;
4969
4970 return t;
4971 }
4972
t_stop(struct seq_file * m,void * p)4973 static void t_stop(struct seq_file *m, void *p)
4974 {
4975 mutex_unlock(&trace_types_lock);
4976 }
4977
t_show(struct seq_file * m,void * v)4978 static int t_show(struct seq_file *m, void *v)
4979 {
4980 struct tracer *t = v;
4981
4982 if (!t)
4983 return 0;
4984
4985 seq_puts(m, t->name);
4986 if (t->next)
4987 seq_putc(m, ' ');
4988 else
4989 seq_putc(m, '\n');
4990
4991 return 0;
4992 }
4993
4994 static const struct seq_operations show_traces_seq_ops = {
4995 .start = t_start,
4996 .next = t_next,
4997 .stop = t_stop,
4998 .show = t_show,
4999 };
5000
show_traces_open(struct inode * inode,struct file * file)5001 static int show_traces_open(struct inode *inode, struct file *file)
5002 {
5003 struct trace_array *tr = inode->i_private;
5004 struct seq_file *m;
5005 int ret;
5006
5007 ret = tracing_check_open_get_tr(tr);
5008 if (ret)
5009 return ret;
5010
5011 ret = seq_open(file, &show_traces_seq_ops);
5012 if (ret) {
5013 trace_array_put(tr);
5014 return ret;
5015 }
5016
5017 m = file->private_data;
5018 m->private = tr;
5019
5020 return 0;
5021 }
5022
tracing_seq_release(struct inode * inode,struct file * file)5023 static int tracing_seq_release(struct inode *inode, struct file *file)
5024 {
5025 struct trace_array *tr = inode->i_private;
5026
5027 trace_array_put(tr);
5028 return seq_release(inode, file);
5029 }
5030
5031 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5032 tracing_write_stub(struct file *filp, const char __user *ubuf,
5033 size_t count, loff_t *ppos)
5034 {
5035 return count;
5036 }
5037
tracing_lseek(struct file * file,loff_t offset,int whence)5038 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5039 {
5040 int ret;
5041
5042 if (file->f_mode & FMODE_READ)
5043 ret = seq_lseek(file, offset, whence);
5044 else
5045 file->f_pos = ret = 0;
5046
5047 return ret;
5048 }
5049
5050 static const struct file_operations tracing_fops = {
5051 .open = tracing_open,
5052 .read = seq_read,
5053 .read_iter = seq_read_iter,
5054 .splice_read = copy_splice_read,
5055 .write = tracing_write_stub,
5056 .llseek = tracing_lseek,
5057 .release = tracing_release,
5058 };
5059
5060 static const struct file_operations show_traces_fops = {
5061 .open = show_traces_open,
5062 .read = seq_read,
5063 .llseek = seq_lseek,
5064 .release = tracing_seq_release,
5065 };
5066
5067 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069 size_t count, loff_t *ppos)
5070 {
5071 struct trace_array *tr = file_inode(filp)->i_private;
5072 char *mask_str __free(kfree) = NULL;
5073 int len;
5074
5075 len = snprintf(NULL, 0, "%*pb\n",
5076 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077 mask_str = kmalloc(len, GFP_KERNEL);
5078 if (!mask_str)
5079 return -ENOMEM;
5080
5081 len = snprintf(mask_str, len, "%*pb\n",
5082 cpumask_pr_args(tr->tracing_cpumask));
5083 if (len >= count)
5084 return -EINVAL;
5085
5086 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5087 }
5088
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5089 int tracing_set_cpumask(struct trace_array *tr,
5090 cpumask_var_t tracing_cpumask_new)
5091 {
5092 int cpu;
5093
5094 if (!tr)
5095 return -EINVAL;
5096
5097 local_irq_disable();
5098 arch_spin_lock(&tr->max_lock);
5099 for_each_tracing_cpu(cpu) {
5100 /*
5101 * Increase/decrease the disabled counter if we are
5102 * about to flip a bit in the cpumask:
5103 */
5104 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5105 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5106 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5107 #ifdef CONFIG_TRACER_MAX_TRACE
5108 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5109 #endif
5110 }
5111 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5112 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5113 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5114 #ifdef CONFIG_TRACER_MAX_TRACE
5115 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5116 #endif
5117 }
5118 }
5119 arch_spin_unlock(&tr->max_lock);
5120 local_irq_enable();
5121
5122 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5123
5124 return 0;
5125 }
5126
5127 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5128 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5129 size_t count, loff_t *ppos)
5130 {
5131 struct trace_array *tr = file_inode(filp)->i_private;
5132 cpumask_var_t tracing_cpumask_new;
5133 int err;
5134
5135 if (count == 0 || count > KMALLOC_MAX_SIZE)
5136 return -EINVAL;
5137
5138 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5139 return -ENOMEM;
5140
5141 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5142 if (err)
5143 goto err_free;
5144
5145 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5146 if (err)
5147 goto err_free;
5148
5149 free_cpumask_var(tracing_cpumask_new);
5150
5151 return count;
5152
5153 err_free:
5154 free_cpumask_var(tracing_cpumask_new);
5155
5156 return err;
5157 }
5158
5159 static const struct file_operations tracing_cpumask_fops = {
5160 .open = tracing_open_generic_tr,
5161 .read = tracing_cpumask_read,
5162 .write = tracing_cpumask_write,
5163 .release = tracing_release_generic_tr,
5164 .llseek = generic_file_llseek,
5165 };
5166
tracing_trace_options_show(struct seq_file * m,void * v)5167 static int tracing_trace_options_show(struct seq_file *m, void *v)
5168 {
5169 struct tracer_opt *trace_opts;
5170 struct trace_array *tr = m->private;
5171 struct tracer_flags *flags;
5172 u32 tracer_flags;
5173 int i;
5174
5175 guard(mutex)(&trace_types_lock);
5176
5177 for (i = 0; trace_options[i]; i++) {
5178 if (tr->trace_flags & (1ULL << i))
5179 seq_printf(m, "%s\n", trace_options[i]);
5180 else
5181 seq_printf(m, "no%s\n", trace_options[i]);
5182 }
5183
5184 flags = tr->current_trace_flags;
5185 if (!flags || !flags->opts)
5186 return 0;
5187
5188 tracer_flags = flags->val;
5189 trace_opts = flags->opts;
5190
5191 for (i = 0; trace_opts[i].name; i++) {
5192 if (tracer_flags & trace_opts[i].bit)
5193 seq_printf(m, "%s\n", trace_opts[i].name);
5194 else
5195 seq_printf(m, "no%s\n", trace_opts[i].name);
5196 }
5197
5198 return 0;
5199 }
5200
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5201 static int __set_tracer_option(struct trace_array *tr,
5202 struct tracer_flags *tracer_flags,
5203 struct tracer_opt *opts, int neg)
5204 {
5205 struct tracer *trace = tracer_flags->trace;
5206 int ret = 0;
5207
5208 if (trace->set_flag)
5209 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5210 if (ret)
5211 return ret;
5212
5213 if (neg)
5214 tracer_flags->val &= ~opts->bit;
5215 else
5216 tracer_flags->val |= opts->bit;
5217 return 0;
5218 }
5219
5220 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5221 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5222 {
5223 struct tracer_flags *tracer_flags = tr->current_trace_flags;
5224 struct tracer_opt *opts = NULL;
5225 int i;
5226
5227 if (!tracer_flags || !tracer_flags->opts)
5228 return 0;
5229
5230 for (i = 0; tracer_flags->opts[i].name; i++) {
5231 opts = &tracer_flags->opts[i];
5232
5233 if (strcmp(cmp, opts->name) == 0)
5234 return __set_tracer_option(tr, tracer_flags, opts, neg);
5235 }
5236
5237 return -EINVAL;
5238 }
5239
5240 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)5241 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5242 {
5243 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5244 return -1;
5245
5246 return 0;
5247 }
5248
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)5249 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5250 {
5251 switch (mask) {
5252 case TRACE_ITER(RECORD_TGID):
5253 case TRACE_ITER(RECORD_CMD):
5254 case TRACE_ITER(TRACE_PRINTK):
5255 case TRACE_ITER(COPY_MARKER):
5256 lockdep_assert_held(&event_mutex);
5257 }
5258
5259 /* do nothing if flag is already set */
5260 if (!!(tr->trace_flags & mask) == !!enabled)
5261 return 0;
5262
5263 /* Give the tracer a chance to approve the change */
5264 if (tr->current_trace->flag_changed)
5265 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5266 return -EINVAL;
5267
5268 switch (mask) {
5269 case TRACE_ITER(TRACE_PRINTK):
5270 if (enabled) {
5271 update_printk_trace(tr);
5272 } else {
5273 /*
5274 * The global_trace cannot clear this.
5275 * It's flag only gets cleared if another instance sets it.
5276 */
5277 if (printk_trace == &global_trace)
5278 return -EINVAL;
5279 /*
5280 * An instance must always have it set.
5281 * by default, that's the global_trace instance.
5282 */
5283 if (printk_trace == tr)
5284 update_printk_trace(&global_trace);
5285 }
5286 break;
5287
5288 case TRACE_ITER(COPY_MARKER):
5289 update_marker_trace(tr, enabled);
5290 /* update_marker_trace updates the tr->trace_flags */
5291 return 0;
5292 }
5293
5294 if (enabled)
5295 tr->trace_flags |= mask;
5296 else
5297 tr->trace_flags &= ~mask;
5298
5299 switch (mask) {
5300 case TRACE_ITER(RECORD_CMD):
5301 trace_event_enable_cmd_record(enabled);
5302 break;
5303
5304 case TRACE_ITER(RECORD_TGID):
5305
5306 if (trace_alloc_tgid_map() < 0) {
5307 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5308 return -ENOMEM;
5309 }
5310
5311 trace_event_enable_tgid_record(enabled);
5312 break;
5313
5314 case TRACE_ITER(EVENT_FORK):
5315 trace_event_follow_fork(tr, enabled);
5316 break;
5317
5318 case TRACE_ITER(FUNC_FORK):
5319 ftrace_pid_follow_fork(tr, enabled);
5320 break;
5321
5322 case TRACE_ITER(OVERWRITE):
5323 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5324 #ifdef CONFIG_TRACER_MAX_TRACE
5325 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5326 #endif
5327 break;
5328
5329 case TRACE_ITER(PRINTK):
5330 trace_printk_start_stop_comm(enabled);
5331 trace_printk_control(enabled);
5332 break;
5333
5334 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5335 case TRACE_GRAPH_GRAPH_TIME:
5336 ftrace_graph_graph_time_control(enabled);
5337 break;
5338 #endif
5339 }
5340
5341 return 0;
5342 }
5343
trace_set_options(struct trace_array * tr,char * option)5344 int trace_set_options(struct trace_array *tr, char *option)
5345 {
5346 char *cmp;
5347 int neg = 0;
5348 int ret;
5349 size_t orig_len = strlen(option);
5350 int len;
5351
5352 cmp = strstrip(option);
5353
5354 len = str_has_prefix(cmp, "no");
5355 if (len)
5356 neg = 1;
5357
5358 cmp += len;
5359
5360 mutex_lock(&event_mutex);
5361 mutex_lock(&trace_types_lock);
5362
5363 ret = match_string(trace_options, -1, cmp);
5364 /* If no option could be set, test the specific tracer options */
5365 if (ret < 0)
5366 ret = set_tracer_option(tr, cmp, neg);
5367 else
5368 ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5369
5370 mutex_unlock(&trace_types_lock);
5371 mutex_unlock(&event_mutex);
5372
5373 /*
5374 * If the first trailing whitespace is replaced with '\0' by strstrip,
5375 * turn it back into a space.
5376 */
5377 if (orig_len > strlen(option))
5378 option[strlen(option)] = ' ';
5379
5380 return ret;
5381 }
5382
apply_trace_boot_options(void)5383 static void __init apply_trace_boot_options(void)
5384 {
5385 char *buf = trace_boot_options_buf;
5386 char *option;
5387
5388 while (true) {
5389 option = strsep(&buf, ",");
5390
5391 if (!option)
5392 break;
5393
5394 if (*option)
5395 trace_set_options(&global_trace, option);
5396
5397 /* Put back the comma to allow this to be called again */
5398 if (buf)
5399 *(buf - 1) = ',';
5400 }
5401 }
5402
5403 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5404 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5405 size_t cnt, loff_t *ppos)
5406 {
5407 struct seq_file *m = filp->private_data;
5408 struct trace_array *tr = m->private;
5409 char buf[64];
5410 int ret;
5411
5412 if (cnt >= sizeof(buf))
5413 return -EINVAL;
5414
5415 if (copy_from_user(buf, ubuf, cnt))
5416 return -EFAULT;
5417
5418 buf[cnt] = 0;
5419
5420 ret = trace_set_options(tr, buf);
5421 if (ret < 0)
5422 return ret;
5423
5424 *ppos += cnt;
5425
5426 return cnt;
5427 }
5428
tracing_trace_options_open(struct inode * inode,struct file * file)5429 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5430 {
5431 struct trace_array *tr = inode->i_private;
5432 int ret;
5433
5434 ret = tracing_check_open_get_tr(tr);
5435 if (ret)
5436 return ret;
5437
5438 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5439 if (ret < 0)
5440 trace_array_put(tr);
5441
5442 return ret;
5443 }
5444
5445 static const struct file_operations tracing_iter_fops = {
5446 .open = tracing_trace_options_open,
5447 .read = seq_read,
5448 .llseek = seq_lseek,
5449 .release = tracing_single_release_tr,
5450 .write = tracing_trace_options_write,
5451 };
5452
5453 static const char readme_msg[] =
5454 "tracing mini-HOWTO:\n\n"
5455 "By default tracefs removes all OTH file permission bits.\n"
5456 "When mounting tracefs an optional group id can be specified\n"
5457 "which adds the group to every directory and file in tracefs:\n\n"
5458 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5459 "# echo 0 > tracing_on : quick way to disable tracing\n"
5460 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5461 " Important files:\n"
5462 " trace\t\t\t- The static contents of the buffer\n"
5463 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5464 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5465 " current_tracer\t- function and latency tracers\n"
5466 " available_tracers\t- list of configured tracers for current_tracer\n"
5467 " error_log\t- error log for failed commands (that support it)\n"
5468 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5469 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5470 " trace_clock\t\t- change the clock used to order events\n"
5471 " local: Per cpu clock but may not be synced across CPUs\n"
5472 " global: Synced across CPUs but slows tracing down.\n"
5473 " counter: Not a clock, but just an increment\n"
5474 " uptime: Jiffy counter from time of boot\n"
5475 " perf: Same clock that perf events use\n"
5476 #ifdef CONFIG_X86_64
5477 " x86-tsc: TSC cycle counter\n"
5478 #endif
5479 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5480 " delta: Delta difference against a buffer-wide timestamp\n"
5481 " absolute: Absolute (standalone) timestamp\n"
5482 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5483 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5484 " tracing_cpumask\t- Limit which CPUs to trace\n"
5485 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5486 "\t\t\t Remove sub-buffer with rmdir\n"
5487 " trace_options\t\t- Set format or modify how tracing happens\n"
5488 "\t\t\t Disable an option by prefixing 'no' to the\n"
5489 "\t\t\t option name\n"
5490 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5491 #ifdef CONFIG_DYNAMIC_FTRACE
5492 "\n available_filter_functions - list of functions that can be filtered on\n"
5493 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5494 "\t\t\t functions\n"
5495 "\t accepts: func_full_name or glob-matching-pattern\n"
5496 "\t modules: Can select a group via module\n"
5497 "\t Format: :mod:<module-name>\n"
5498 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5499 "\t triggers: a command to perform when function is hit\n"
5500 "\t Format: <function>:<trigger>[:count]\n"
5501 "\t trigger: traceon, traceoff\n"
5502 "\t\t enable_event:<system>:<event>\n"
5503 "\t\t disable_event:<system>:<event>\n"
5504 #ifdef CONFIG_STACKTRACE
5505 "\t\t stacktrace\n"
5506 #endif
5507 #ifdef CONFIG_TRACER_SNAPSHOT
5508 "\t\t snapshot\n"
5509 #endif
5510 "\t\t dump\n"
5511 "\t\t cpudump\n"
5512 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5513 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5514 "\t The first one will disable tracing every time do_fault is hit\n"
5515 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5516 "\t The first time do trap is hit and it disables tracing, the\n"
5517 "\t counter will decrement to 2. If tracing is already disabled,\n"
5518 "\t the counter will not decrement. It only decrements when the\n"
5519 "\t trigger did work\n"
5520 "\t To remove trigger without count:\n"
5521 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5522 "\t To remove trigger with a count:\n"
5523 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5524 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5525 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5526 "\t modules: Can select a group via module command :mod:\n"
5527 "\t Does not accept triggers\n"
5528 #endif /* CONFIG_DYNAMIC_FTRACE */
5529 #ifdef CONFIG_FUNCTION_TRACER
5530 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5531 "\t\t (function)\n"
5532 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5533 "\t\t (function)\n"
5534 #endif
5535 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5536 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5537 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5538 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5539 #endif
5540 #ifdef CONFIG_TRACER_SNAPSHOT
5541 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5542 "\t\t\t snapshot buffer. Read the contents for more\n"
5543 "\t\t\t information\n"
5544 #endif
5545 #ifdef CONFIG_STACK_TRACER
5546 " stack_trace\t\t- Shows the max stack trace when active\n"
5547 " stack_max_size\t- Shows current max stack size that was traced\n"
5548 "\t\t\t Write into this file to reset the max size (trigger a\n"
5549 "\t\t\t new trace)\n"
5550 #ifdef CONFIG_DYNAMIC_FTRACE
5551 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5552 "\t\t\t traces\n"
5553 #endif
5554 #endif /* CONFIG_STACK_TRACER */
5555 #ifdef CONFIG_DYNAMIC_EVENTS
5556 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5557 "\t\t\t Write into this file to define/undefine new trace events.\n"
5558 #endif
5559 #ifdef CONFIG_KPROBE_EVENTS
5560 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5561 "\t\t\t Write into this file to define/undefine new trace events.\n"
5562 #endif
5563 #ifdef CONFIG_UPROBE_EVENTS
5564 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5565 "\t\t\t Write into this file to define/undefine new trace events.\n"
5566 #endif
5567 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5568 defined(CONFIG_FPROBE_EVENTS)
5569 "\t accepts: event-definitions (one definition per line)\n"
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5572 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5573 #endif
5574 #ifdef CONFIG_FPROBE_EVENTS
5575 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5576 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5577 #endif
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 "\t s:[synthetic/]<event> <field> [<field>]\n"
5580 #endif
5581 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5582 "\t -:[<group>/][<event>]\n"
5583 #ifdef CONFIG_KPROBE_EVENTS
5584 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5585 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5586 #endif
5587 #ifdef CONFIG_UPROBE_EVENTS
5588 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5589 #endif
5590 "\t args: <name>=fetcharg[:type]\n"
5591 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5592 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5593 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5594 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5595 "\t <argname>[->field[->field|.field...]],\n"
5596 #endif
5597 #else
5598 "\t $stack<index>, $stack, $retval, $comm,\n"
5599 #endif
5600 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5601 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5602 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5603 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5604 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5605 #ifdef CONFIG_HIST_TRIGGERS
5606 "\t field: <stype> <name>;\n"
5607 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5608 "\t [unsigned] char/int/long\n"
5609 #endif
5610 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5611 "\t of the <attached-group>/<attached-event>.\n"
5612 #endif
5613 " set_event\t\t- Enables events by name written into it\n"
5614 "\t\t\t Can enable module events via: :mod:<module>\n"
5615 " events/\t\t- Directory containing all trace event subsystems:\n"
5616 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5617 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5618 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5619 "\t\t\t events\n"
5620 " filter\t\t- If set, only events passing filter are traced\n"
5621 " events/<system>/<event>/\t- Directory containing control files for\n"
5622 "\t\t\t <event>:\n"
5623 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5624 " filter\t\t- If set, only events passing filter are traced\n"
5625 " trigger\t\t- If set, a command to perform when event is hit\n"
5626 "\t Format: <trigger>[:count][if <filter>]\n"
5627 "\t trigger: traceon, traceoff\n"
5628 "\t enable_event:<system>:<event>\n"
5629 "\t disable_event:<system>:<event>\n"
5630 #ifdef CONFIG_HIST_TRIGGERS
5631 "\t enable_hist:<system>:<event>\n"
5632 "\t disable_hist:<system>:<event>\n"
5633 #endif
5634 #ifdef CONFIG_STACKTRACE
5635 "\t\t stacktrace\n"
5636 #endif
5637 #ifdef CONFIG_TRACER_SNAPSHOT
5638 "\t\t snapshot\n"
5639 #endif
5640 #ifdef CONFIG_HIST_TRIGGERS
5641 "\t\t hist (see below)\n"
5642 #endif
5643 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5644 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5645 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5646 "\t events/block/block_unplug/trigger\n"
5647 "\t The first disables tracing every time block_unplug is hit.\n"
5648 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5649 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5650 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5651 "\t Like function triggers, the counter is only decremented if it\n"
5652 "\t enabled or disabled tracing.\n"
5653 "\t To remove a trigger without a count:\n"
5654 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5655 "\t To remove a trigger with a count:\n"
5656 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5657 "\t Filters can be ignored when removing a trigger.\n"
5658 #ifdef CONFIG_HIST_TRIGGERS
5659 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5660 "\t Format: hist:keys=<field1[,field2,...]>\n"
5661 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5662 "\t [:values=<field1[,field2,...]>]\n"
5663 "\t [:sort=<field1[,field2,...]>]\n"
5664 "\t [:size=#entries]\n"
5665 "\t [:pause][:continue][:clear]\n"
5666 "\t [:name=histname1]\n"
5667 "\t [:nohitcount]\n"
5668 "\t [:<handler>.<action>]\n"
5669 "\t [if <filter>]\n\n"
5670 "\t Note, special fields can be used as well:\n"
5671 "\t common_timestamp - to record current timestamp\n"
5672 "\t common_cpu - to record the CPU the event happened on\n"
5673 "\n"
5674 "\t A hist trigger variable can be:\n"
5675 "\t - a reference to a field e.g. x=current_timestamp,\n"
5676 "\t - a reference to another variable e.g. y=$x,\n"
5677 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5678 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5679 "\n"
5680 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5681 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5682 "\t variable reference, field or numeric literal.\n"
5683 "\n"
5684 "\t When a matching event is hit, an entry is added to a hash\n"
5685 "\t table using the key(s) and value(s) named, and the value of a\n"
5686 "\t sum called 'hitcount' is incremented. Keys and values\n"
5687 "\t correspond to fields in the event's format description. Keys\n"
5688 "\t can be any field, or the special string 'common_stacktrace'.\n"
5689 "\t Compound keys consisting of up to two fields can be specified\n"
5690 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5691 "\t fields. Sort keys consisting of up to two fields can be\n"
5692 "\t specified using the 'sort' keyword. The sort direction can\n"
5693 "\t be modified by appending '.descending' or '.ascending' to a\n"
5694 "\t sort field. The 'size' parameter can be used to specify more\n"
5695 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5696 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5697 "\t its histogram data will be shared with other triggers of the\n"
5698 "\t same name, and trigger hits will update this common data.\n\n"
5699 "\t Reading the 'hist' file for the event will dump the hash\n"
5700 "\t table in its entirety to stdout. If there are multiple hist\n"
5701 "\t triggers attached to an event, there will be a table for each\n"
5702 "\t trigger in the output. The table displayed for a named\n"
5703 "\t trigger will be the same as any other instance having the\n"
5704 "\t same name. The default format used to display a given field\n"
5705 "\t can be modified by appending any of the following modifiers\n"
5706 "\t to the field name, as applicable:\n\n"
5707 "\t .hex display a number as a hex value\n"
5708 "\t .sym display an address as a symbol\n"
5709 "\t .sym-offset display an address as a symbol and offset\n"
5710 "\t .execname display a common_pid as a program name\n"
5711 "\t .syscall display a syscall id as a syscall name\n"
5712 "\t .log2 display log2 value rather than raw number\n"
5713 "\t .buckets=size display values in groups of size rather than raw number\n"
5714 "\t .usecs display a common_timestamp in microseconds\n"
5715 "\t .percent display a number of percentage value\n"
5716 "\t .graph display a bar-graph of a value\n\n"
5717 "\t The 'pause' parameter can be used to pause an existing hist\n"
5718 "\t trigger or to start a hist trigger but not log any events\n"
5719 "\t until told to do so. 'continue' can be used to start or\n"
5720 "\t restart a paused hist trigger.\n\n"
5721 "\t The 'clear' parameter will clear the contents of a running\n"
5722 "\t hist trigger and leave its current paused/active state\n"
5723 "\t unchanged.\n\n"
5724 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5725 "\t raw hitcount in the histogram.\n\n"
5726 "\t The enable_hist and disable_hist triggers can be used to\n"
5727 "\t have one event conditionally start and stop another event's\n"
5728 "\t already-attached hist trigger. The syntax is analogous to\n"
5729 "\t the enable_event and disable_event triggers.\n\n"
5730 "\t Hist trigger handlers and actions are executed whenever a\n"
5731 "\t a histogram entry is added or updated. They take the form:\n\n"
5732 "\t <handler>.<action>\n\n"
5733 "\t The available handlers are:\n\n"
5734 "\t onmatch(matching.event) - invoke on addition or update\n"
5735 "\t onmax(var) - invoke if var exceeds current max\n"
5736 "\t onchange(var) - invoke action if var changes\n\n"
5737 "\t The available actions are:\n\n"
5738 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5739 "\t save(field,...) - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 "\t snapshot() - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745 "\t Write into this file to define/undefine new synthetic events.\n"
5746 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750
5751 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753 size_t cnt, loff_t *ppos)
5754 {
5755 return simple_read_from_buffer(ubuf, cnt, ppos,
5756 readme_msg, strlen(readme_msg));
5757 }
5758
5759 static const struct file_operations tracing_readme_fops = {
5760 .open = tracing_open_generic,
5761 .read = tracing_readme_read,
5762 .llseek = generic_file_llseek,
5763 };
5764
5765 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5766 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5767 update_eval_map(union trace_eval_map_item *ptr)
5768 {
5769 if (!ptr->map.eval_string) {
5770 if (ptr->tail.next) {
5771 ptr = ptr->tail.next;
5772 /* Set ptr to the next real item (skip head) */
5773 ptr++;
5774 } else
5775 return NULL;
5776 }
5777 return ptr;
5778 }
5779
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5780 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5781 {
5782 union trace_eval_map_item *ptr = v;
5783
5784 /*
5785 * Paranoid! If ptr points to end, we don't want to increment past it.
5786 * This really should never happen.
5787 */
5788 (*pos)++;
5789 ptr = update_eval_map(ptr);
5790 if (WARN_ON_ONCE(!ptr))
5791 return NULL;
5792
5793 ptr++;
5794 ptr = update_eval_map(ptr);
5795
5796 return ptr;
5797 }
5798
eval_map_start(struct seq_file * m,loff_t * pos)5799 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5800 {
5801 union trace_eval_map_item *v;
5802 loff_t l = 0;
5803
5804 mutex_lock(&trace_eval_mutex);
5805
5806 v = trace_eval_maps;
5807 if (v)
5808 v++;
5809
5810 while (v && l < *pos) {
5811 v = eval_map_next(m, v, &l);
5812 }
5813
5814 return v;
5815 }
5816
eval_map_stop(struct seq_file * m,void * v)5817 static void eval_map_stop(struct seq_file *m, void *v)
5818 {
5819 mutex_unlock(&trace_eval_mutex);
5820 }
5821
eval_map_show(struct seq_file * m,void * v)5822 static int eval_map_show(struct seq_file *m, void *v)
5823 {
5824 union trace_eval_map_item *ptr = v;
5825
5826 seq_printf(m, "%s %ld (%s)\n",
5827 ptr->map.eval_string, ptr->map.eval_value,
5828 ptr->map.system);
5829
5830 return 0;
5831 }
5832
5833 static const struct seq_operations tracing_eval_map_seq_ops = {
5834 .start = eval_map_start,
5835 .next = eval_map_next,
5836 .stop = eval_map_stop,
5837 .show = eval_map_show,
5838 };
5839
tracing_eval_map_open(struct inode * inode,struct file * filp)5840 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5841 {
5842 int ret;
5843
5844 ret = tracing_check_open_get_tr(NULL);
5845 if (ret)
5846 return ret;
5847
5848 return seq_open(filp, &tracing_eval_map_seq_ops);
5849 }
5850
5851 static const struct file_operations tracing_eval_map_fops = {
5852 .open = tracing_eval_map_open,
5853 .read = seq_read,
5854 .llseek = seq_lseek,
5855 .release = seq_release,
5856 };
5857
5858 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5859 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5860 {
5861 /* Return tail of array given the head */
5862 return ptr + ptr->head.length + 1;
5863 }
5864
5865 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5866 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5867 int len)
5868 {
5869 struct trace_eval_map **stop;
5870 struct trace_eval_map **map;
5871 union trace_eval_map_item *map_array;
5872 union trace_eval_map_item *ptr;
5873
5874 stop = start + len;
5875
5876 /*
5877 * The trace_eval_maps contains the map plus a head and tail item,
5878 * where the head holds the module and length of array, and the
5879 * tail holds a pointer to the next list.
5880 */
5881 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5882 if (!map_array) {
5883 pr_warn("Unable to allocate trace eval mapping\n");
5884 return;
5885 }
5886
5887 guard(mutex)(&trace_eval_mutex);
5888
5889 if (!trace_eval_maps)
5890 trace_eval_maps = map_array;
5891 else {
5892 ptr = trace_eval_maps;
5893 for (;;) {
5894 ptr = trace_eval_jmp_to_tail(ptr);
5895 if (!ptr->tail.next)
5896 break;
5897 ptr = ptr->tail.next;
5898
5899 }
5900 ptr->tail.next = map_array;
5901 }
5902 map_array->head.mod = mod;
5903 map_array->head.length = len;
5904 map_array++;
5905
5906 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5907 map_array->map = **map;
5908 map_array++;
5909 }
5910 memset(map_array, 0, sizeof(*map_array));
5911 }
5912
trace_create_eval_file(struct dentry * d_tracer)5913 static void trace_create_eval_file(struct dentry *d_tracer)
5914 {
5915 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5916 NULL, &tracing_eval_map_fops);
5917 }
5918
5919 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5920 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5921 static inline void trace_insert_eval_map_file(struct module *mod,
5922 struct trace_eval_map **start, int len) { }
5923 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5924
5925 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5926 trace_event_update_with_eval_map(struct module *mod,
5927 struct trace_eval_map **start,
5928 int len)
5929 {
5930 struct trace_eval_map **map;
5931
5932 /* Always run sanitizer only if btf_type_tag attr exists. */
5933 if (len <= 0) {
5934 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5935 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5936 __has_attribute(btf_type_tag)))
5937 return;
5938 }
5939
5940 map = start;
5941
5942 trace_event_update_all(map, len);
5943
5944 if (len <= 0)
5945 return;
5946
5947 trace_insert_eval_map_file(mod, start, len);
5948 }
5949
5950 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5951 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5952 size_t cnt, loff_t *ppos)
5953 {
5954 struct trace_array *tr = filp->private_data;
5955 char buf[MAX_TRACER_SIZE+2];
5956 int r;
5957
5958 scoped_guard(mutex, &trace_types_lock) {
5959 r = sprintf(buf, "%s\n", tr->current_trace->name);
5960 }
5961
5962 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5963 }
5964
tracer_init(struct tracer * t,struct trace_array * tr)5965 int tracer_init(struct tracer *t, struct trace_array *tr)
5966 {
5967 tracing_reset_online_cpus(&tr->array_buffer);
5968 return t->init(tr);
5969 }
5970
set_buffer_entries(struct array_buffer * buf,unsigned long val)5971 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5972 {
5973 int cpu;
5974
5975 for_each_tracing_cpu(cpu)
5976 per_cpu_ptr(buf->data, cpu)->entries = val;
5977 }
5978
update_buffer_entries(struct array_buffer * buf,int cpu)5979 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5980 {
5981 if (cpu == RING_BUFFER_ALL_CPUS) {
5982 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5983 } else {
5984 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5985 }
5986 }
5987
5988 #ifdef CONFIG_TRACER_MAX_TRACE
5989 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5990 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5991 struct array_buffer *size_buf, int cpu_id)
5992 {
5993 int cpu, ret = 0;
5994
5995 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5996 for_each_tracing_cpu(cpu) {
5997 ret = ring_buffer_resize(trace_buf->buffer,
5998 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5999 if (ret < 0)
6000 break;
6001 per_cpu_ptr(trace_buf->data, cpu)->entries =
6002 per_cpu_ptr(size_buf->data, cpu)->entries;
6003 }
6004 } else {
6005 ret = ring_buffer_resize(trace_buf->buffer,
6006 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6007 if (ret == 0)
6008 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6009 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6010 }
6011
6012 return ret;
6013 }
6014 #endif /* CONFIG_TRACER_MAX_TRACE */
6015
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6016 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6017 unsigned long size, int cpu)
6018 {
6019 int ret;
6020
6021 /*
6022 * If kernel or user changes the size of the ring buffer
6023 * we use the size that was given, and we can forget about
6024 * expanding it later.
6025 */
6026 trace_set_ring_buffer_expanded(tr);
6027
6028 /* May be called before buffers are initialized */
6029 if (!tr->array_buffer.buffer)
6030 return 0;
6031
6032 /* Do not allow tracing while resizing ring buffer */
6033 tracing_stop_tr(tr);
6034
6035 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6036 if (ret < 0)
6037 goto out_start;
6038
6039 #ifdef CONFIG_TRACER_MAX_TRACE
6040 if (!tr->allocated_snapshot)
6041 goto out;
6042
6043 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6044 if (ret < 0) {
6045 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6046 &tr->array_buffer, cpu);
6047 if (r < 0) {
6048 /*
6049 * AARGH! We are left with different
6050 * size max buffer!!!!
6051 * The max buffer is our "snapshot" buffer.
6052 * When a tracer needs a snapshot (one of the
6053 * latency tracers), it swaps the max buffer
6054 * with the saved snap shot. We succeeded to
6055 * update the size of the main buffer, but failed to
6056 * update the size of the max buffer. But when we tried
6057 * to reset the main buffer to the original size, we
6058 * failed there too. This is very unlikely to
6059 * happen, but if it does, warn and kill all
6060 * tracing.
6061 */
6062 WARN_ON(1);
6063 tracing_disabled = 1;
6064 }
6065 goto out_start;
6066 }
6067
6068 update_buffer_entries(&tr->max_buffer, cpu);
6069
6070 out:
6071 #endif /* CONFIG_TRACER_MAX_TRACE */
6072
6073 update_buffer_entries(&tr->array_buffer, cpu);
6074 out_start:
6075 tracing_start_tr(tr);
6076 return ret;
6077 }
6078
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6079 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6080 unsigned long size, int cpu_id)
6081 {
6082 guard(mutex)(&trace_types_lock);
6083
6084 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6085 /* make sure, this cpu is enabled in the mask */
6086 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6087 return -EINVAL;
6088 }
6089
6090 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6091 }
6092
6093 struct trace_mod_entry {
6094 unsigned long mod_addr;
6095 char mod_name[MODULE_NAME_LEN];
6096 };
6097
6098 struct trace_scratch {
6099 unsigned int clock_id;
6100 unsigned long text_addr;
6101 unsigned long nr_entries;
6102 struct trace_mod_entry entries[];
6103 };
6104
6105 static DEFINE_MUTEX(scratch_mutex);
6106
cmp_mod_entry(const void * key,const void * pivot)6107 static int cmp_mod_entry(const void *key, const void *pivot)
6108 {
6109 unsigned long addr = (unsigned long)key;
6110 const struct trace_mod_entry *ent = pivot;
6111
6112 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6113 return 0;
6114 else
6115 return addr - ent->mod_addr;
6116 }
6117
6118 /**
6119 * trace_adjust_address() - Adjust prev boot address to current address.
6120 * @tr: Persistent ring buffer's trace_array.
6121 * @addr: Address in @tr which is adjusted.
6122 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6123 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6124 {
6125 struct trace_module_delta *module_delta;
6126 struct trace_scratch *tscratch;
6127 struct trace_mod_entry *entry;
6128 unsigned long raddr;
6129 int idx = 0, nr_entries;
6130
6131 /* If we don't have last boot delta, return the address */
6132 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6133 return addr;
6134
6135 /* tr->module_delta must be protected by rcu. */
6136 guard(rcu)();
6137 tscratch = tr->scratch;
6138 /* if there is no tscrach, module_delta must be NULL. */
6139 module_delta = READ_ONCE(tr->module_delta);
6140 if (!module_delta || !tscratch->nr_entries ||
6141 tscratch->entries[0].mod_addr > addr) {
6142 raddr = addr + tr->text_delta;
6143 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6144 is_kernel_rodata(raddr) ? raddr : addr;
6145 }
6146
6147 /* Note that entries must be sorted. */
6148 nr_entries = tscratch->nr_entries;
6149 if (nr_entries == 1 ||
6150 tscratch->entries[nr_entries - 1].mod_addr < addr)
6151 idx = nr_entries - 1;
6152 else {
6153 entry = __inline_bsearch((void *)addr,
6154 tscratch->entries,
6155 nr_entries - 1,
6156 sizeof(tscratch->entries[0]),
6157 cmp_mod_entry);
6158 if (entry)
6159 idx = entry - tscratch->entries;
6160 }
6161
6162 return addr + module_delta->delta[idx];
6163 }
6164
6165 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6166 static int save_mod(struct module *mod, void *data)
6167 {
6168 struct trace_array *tr = data;
6169 struct trace_scratch *tscratch;
6170 struct trace_mod_entry *entry;
6171 unsigned int size;
6172
6173 tscratch = tr->scratch;
6174 if (!tscratch)
6175 return -1;
6176 size = tr->scratch_size;
6177
6178 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6179 return -1;
6180
6181 entry = &tscratch->entries[tscratch->nr_entries];
6182
6183 tscratch->nr_entries++;
6184
6185 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6186 strscpy(entry->mod_name, mod->name);
6187
6188 return 0;
6189 }
6190 #else
save_mod(struct module * mod,void * data)6191 static int save_mod(struct module *mod, void *data)
6192 {
6193 return 0;
6194 }
6195 #endif
6196
update_last_data(struct trace_array * tr)6197 static void update_last_data(struct trace_array *tr)
6198 {
6199 struct trace_module_delta *module_delta;
6200 struct trace_scratch *tscratch;
6201
6202 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6203 return;
6204
6205 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6206 return;
6207
6208 /* Only if the buffer has previous boot data clear and update it. */
6209 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6210
6211 /* Reset the module list and reload them */
6212 if (tr->scratch) {
6213 struct trace_scratch *tscratch = tr->scratch;
6214
6215 tscratch->clock_id = tr->clock_id;
6216 memset(tscratch->entries, 0,
6217 flex_array_size(tscratch, entries, tscratch->nr_entries));
6218 tscratch->nr_entries = 0;
6219
6220 guard(mutex)(&scratch_mutex);
6221 module_for_each_mod(save_mod, tr);
6222 }
6223
6224 /*
6225 * Need to clear all CPU buffers as there cannot be events
6226 * from the previous boot mixed with events with this boot
6227 * as that will cause a confusing trace. Need to clear all
6228 * CPU buffers, even for those that may currently be offline.
6229 */
6230 tracing_reset_all_cpus(&tr->array_buffer);
6231
6232 /* Using current data now */
6233 tr->text_delta = 0;
6234
6235 if (!tr->scratch)
6236 return;
6237
6238 tscratch = tr->scratch;
6239 module_delta = READ_ONCE(tr->module_delta);
6240 WRITE_ONCE(tr->module_delta, NULL);
6241 kfree_rcu(module_delta, rcu);
6242
6243 /* Set the persistent ring buffer meta data to this address */
6244 tscratch->text_addr = (unsigned long)_text;
6245 }
6246
6247 /**
6248 * tracing_update_buffers - used by tracing facility to expand ring buffers
6249 * @tr: The tracing instance
6250 *
6251 * To save on memory when the tracing is never used on a system with it
6252 * configured in. The ring buffers are set to a minimum size. But once
6253 * a user starts to use the tracing facility, then they need to grow
6254 * to their default size.
6255 *
6256 * This function is to be called when a tracer is about to be used.
6257 */
tracing_update_buffers(struct trace_array * tr)6258 int tracing_update_buffers(struct trace_array *tr)
6259 {
6260 int ret = 0;
6261
6262 guard(mutex)(&trace_types_lock);
6263
6264 update_last_data(tr);
6265
6266 if (!tr->ring_buffer_expanded)
6267 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6268 RING_BUFFER_ALL_CPUS);
6269 return ret;
6270 }
6271
6272 /*
6273 * Used to clear out the tracer before deletion of an instance.
6274 * Must have trace_types_lock held.
6275 */
tracing_set_nop(struct trace_array * tr)6276 static void tracing_set_nop(struct trace_array *tr)
6277 {
6278 if (tr->current_trace == &nop_trace)
6279 return;
6280
6281 tr->current_trace->enabled--;
6282
6283 if (tr->current_trace->reset)
6284 tr->current_trace->reset(tr);
6285
6286 tr->current_trace = &nop_trace;
6287 tr->current_trace_flags = nop_trace.flags;
6288 }
6289
6290 static bool tracer_options_updated;
6291
tracing_set_tracer(struct trace_array * tr,const char * buf)6292 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6293 {
6294 struct tracer *trace = NULL;
6295 struct tracers *t;
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297 bool had_max_tr;
6298 #endif
6299 int ret;
6300
6301 guard(mutex)(&trace_types_lock);
6302
6303 update_last_data(tr);
6304
6305 if (!tr->ring_buffer_expanded) {
6306 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6307 RING_BUFFER_ALL_CPUS);
6308 if (ret < 0)
6309 return ret;
6310 ret = 0;
6311 }
6312
6313 list_for_each_entry(t, &tr->tracers, list) {
6314 if (strcmp(t->tracer->name, buf) == 0) {
6315 trace = t->tracer;
6316 break;
6317 }
6318 }
6319 if (!trace)
6320 return -EINVAL;
6321
6322 if (trace == tr->current_trace)
6323 return 0;
6324
6325 #ifdef CONFIG_TRACER_SNAPSHOT
6326 if (trace->use_max_tr) {
6327 local_irq_disable();
6328 arch_spin_lock(&tr->max_lock);
6329 ret = tr->cond_snapshot ? -EBUSY : 0;
6330 arch_spin_unlock(&tr->max_lock);
6331 local_irq_enable();
6332 if (ret)
6333 return ret;
6334 }
6335 #endif
6336 /* Some tracers won't work on kernel command line */
6337 if (system_state < SYSTEM_RUNNING && trace->noboot) {
6338 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6339 trace->name);
6340 return -EINVAL;
6341 }
6342
6343 /* Some tracers are only allowed for the top level buffer */
6344 if (!trace_ok_for_array(trace, tr))
6345 return -EINVAL;
6346
6347 /* If trace pipe files are being read, we can't change the tracer */
6348 if (tr->trace_ref)
6349 return -EBUSY;
6350
6351 trace_branch_disable();
6352
6353 tr->current_trace->enabled--;
6354
6355 if (tr->current_trace->reset)
6356 tr->current_trace->reset(tr);
6357
6358 #ifdef CONFIG_TRACER_MAX_TRACE
6359 had_max_tr = tr->current_trace->use_max_tr;
6360
6361 /* Current trace needs to be nop_trace before synchronize_rcu */
6362 tr->current_trace = &nop_trace;
6363 tr->current_trace_flags = nop_trace.flags;
6364
6365 if (had_max_tr && !trace->use_max_tr) {
6366 /*
6367 * We need to make sure that the update_max_tr sees that
6368 * current_trace changed to nop_trace to keep it from
6369 * swapping the buffers after we resize it.
6370 * The update_max_tr is called from interrupts disabled
6371 * so a synchronized_sched() is sufficient.
6372 */
6373 synchronize_rcu();
6374 free_snapshot(tr);
6375 tracing_disarm_snapshot(tr);
6376 }
6377
6378 if (!had_max_tr && trace->use_max_tr) {
6379 ret = tracing_arm_snapshot_locked(tr);
6380 if (ret)
6381 return ret;
6382 }
6383 #else
6384 tr->current_trace = &nop_trace;
6385 #endif
6386
6387 tr->current_trace_flags = t->flags ? : t->tracer->flags;
6388
6389 if (trace->init) {
6390 ret = tracer_init(trace, tr);
6391 if (ret) {
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393 if (trace->use_max_tr)
6394 tracing_disarm_snapshot(tr);
6395 #endif
6396 tr->current_trace_flags = nop_trace.flags;
6397 return ret;
6398 }
6399 }
6400
6401 tr->current_trace = trace;
6402 tr->current_trace->enabled++;
6403 trace_branch_enable(tr);
6404
6405 return 0;
6406 }
6407
6408 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6409 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6410 size_t cnt, loff_t *ppos)
6411 {
6412 struct trace_array *tr = filp->private_data;
6413 char buf[MAX_TRACER_SIZE+1];
6414 char *name;
6415 size_t ret;
6416 int err;
6417
6418 ret = cnt;
6419
6420 if (cnt > MAX_TRACER_SIZE)
6421 cnt = MAX_TRACER_SIZE;
6422
6423 if (copy_from_user(buf, ubuf, cnt))
6424 return -EFAULT;
6425
6426 buf[cnt] = 0;
6427
6428 name = strim(buf);
6429
6430 err = tracing_set_tracer(tr, name);
6431 if (err)
6432 return err;
6433
6434 *ppos += ret;
6435
6436 return ret;
6437 }
6438
6439 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6440 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6441 size_t cnt, loff_t *ppos)
6442 {
6443 char buf[64];
6444 int r;
6445
6446 r = snprintf(buf, sizeof(buf), "%ld\n",
6447 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6448 if (r > sizeof(buf))
6449 r = sizeof(buf);
6450 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6451 }
6452
6453 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6454 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6455 size_t cnt, loff_t *ppos)
6456 {
6457 unsigned long val;
6458 int ret;
6459
6460 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6461 if (ret)
6462 return ret;
6463
6464 *ptr = val * 1000;
6465
6466 return cnt;
6467 }
6468
6469 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6470 tracing_thresh_read(struct file *filp, char __user *ubuf,
6471 size_t cnt, loff_t *ppos)
6472 {
6473 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6474 }
6475
6476 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6477 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6478 size_t cnt, loff_t *ppos)
6479 {
6480 struct trace_array *tr = filp->private_data;
6481 int ret;
6482
6483 guard(mutex)(&trace_types_lock);
6484 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6485 if (ret < 0)
6486 return ret;
6487
6488 if (tr->current_trace->update_thresh) {
6489 ret = tr->current_trace->update_thresh(tr);
6490 if (ret < 0)
6491 return ret;
6492 }
6493
6494 return cnt;
6495 }
6496
6497 #ifdef CONFIG_TRACER_MAX_TRACE
6498
6499 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6500 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6501 size_t cnt, loff_t *ppos)
6502 {
6503 struct trace_array *tr = filp->private_data;
6504
6505 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6506 }
6507
6508 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6509 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6510 size_t cnt, loff_t *ppos)
6511 {
6512 struct trace_array *tr = filp->private_data;
6513
6514 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6515 }
6516
6517 #endif
6518
open_pipe_on_cpu(struct trace_array * tr,int cpu)6519 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6520 {
6521 if (cpu == RING_BUFFER_ALL_CPUS) {
6522 if (cpumask_empty(tr->pipe_cpumask)) {
6523 cpumask_setall(tr->pipe_cpumask);
6524 return 0;
6525 }
6526 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6527 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6528 return 0;
6529 }
6530 return -EBUSY;
6531 }
6532
close_pipe_on_cpu(struct trace_array * tr,int cpu)6533 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6534 {
6535 if (cpu == RING_BUFFER_ALL_CPUS) {
6536 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6537 cpumask_clear(tr->pipe_cpumask);
6538 } else {
6539 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6540 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6541 }
6542 }
6543
tracing_open_pipe(struct inode * inode,struct file * filp)6544 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6545 {
6546 struct trace_array *tr = inode->i_private;
6547 struct trace_iterator *iter;
6548 int cpu;
6549 int ret;
6550
6551 ret = tracing_check_open_get_tr(tr);
6552 if (ret)
6553 return ret;
6554
6555 guard(mutex)(&trace_types_lock);
6556 cpu = tracing_get_cpu(inode);
6557 ret = open_pipe_on_cpu(tr, cpu);
6558 if (ret)
6559 goto fail_pipe_on_cpu;
6560
6561 /* create a buffer to store the information to pass to userspace */
6562 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6563 if (!iter) {
6564 ret = -ENOMEM;
6565 goto fail_alloc_iter;
6566 }
6567
6568 trace_seq_init(&iter->seq);
6569 iter->trace = tr->current_trace;
6570
6571 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6572 ret = -ENOMEM;
6573 goto fail;
6574 }
6575
6576 /* trace pipe does not show start of buffer */
6577 cpumask_setall(iter->started);
6578
6579 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6580 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6581
6582 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6583 if (trace_clocks[tr->clock_id].in_ns)
6584 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6585
6586 iter->tr = tr;
6587 iter->array_buffer = &tr->array_buffer;
6588 iter->cpu_file = cpu;
6589 mutex_init(&iter->mutex);
6590 filp->private_data = iter;
6591
6592 if (iter->trace->pipe_open)
6593 iter->trace->pipe_open(iter);
6594
6595 nonseekable_open(inode, filp);
6596
6597 tr->trace_ref++;
6598
6599 return ret;
6600
6601 fail:
6602 kfree(iter);
6603 fail_alloc_iter:
6604 close_pipe_on_cpu(tr, cpu);
6605 fail_pipe_on_cpu:
6606 __trace_array_put(tr);
6607 return ret;
6608 }
6609
tracing_release_pipe(struct inode * inode,struct file * file)6610 static int tracing_release_pipe(struct inode *inode, struct file *file)
6611 {
6612 struct trace_iterator *iter = file->private_data;
6613 struct trace_array *tr = inode->i_private;
6614
6615 scoped_guard(mutex, &trace_types_lock) {
6616 tr->trace_ref--;
6617
6618 if (iter->trace->pipe_close)
6619 iter->trace->pipe_close(iter);
6620 close_pipe_on_cpu(tr, iter->cpu_file);
6621 }
6622
6623 free_trace_iter_content(iter);
6624 kfree(iter);
6625
6626 trace_array_put(tr);
6627
6628 return 0;
6629 }
6630
6631 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6632 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6633 {
6634 struct trace_array *tr = iter->tr;
6635
6636 /* Iterators are static, they should be filled or empty */
6637 if (trace_buffer_iter(iter, iter->cpu_file))
6638 return EPOLLIN | EPOLLRDNORM;
6639
6640 if (tr->trace_flags & TRACE_ITER(BLOCK))
6641 /*
6642 * Always select as readable when in blocking mode
6643 */
6644 return EPOLLIN | EPOLLRDNORM;
6645 else
6646 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6647 filp, poll_table, iter->tr->buffer_percent);
6648 }
6649
6650 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6651 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6652 {
6653 struct trace_iterator *iter = filp->private_data;
6654
6655 return trace_poll(iter, filp, poll_table);
6656 }
6657
6658 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6659 static int tracing_wait_pipe(struct file *filp)
6660 {
6661 struct trace_iterator *iter = filp->private_data;
6662 int ret;
6663
6664 while (trace_empty(iter)) {
6665
6666 if ((filp->f_flags & O_NONBLOCK)) {
6667 return -EAGAIN;
6668 }
6669
6670 /*
6671 * We block until we read something and tracing is disabled.
6672 * We still block if tracing is disabled, but we have never
6673 * read anything. This allows a user to cat this file, and
6674 * then enable tracing. But after we have read something,
6675 * we give an EOF when tracing is again disabled.
6676 *
6677 * iter->pos will be 0 if we haven't read anything.
6678 */
6679 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6680 break;
6681
6682 mutex_unlock(&iter->mutex);
6683
6684 ret = wait_on_pipe(iter, 0);
6685
6686 mutex_lock(&iter->mutex);
6687
6688 if (ret)
6689 return ret;
6690 }
6691
6692 return 1;
6693 }
6694
update_last_data_if_empty(struct trace_array * tr)6695 static bool update_last_data_if_empty(struct trace_array *tr)
6696 {
6697 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6698 return false;
6699
6700 if (!ring_buffer_empty(tr->array_buffer.buffer))
6701 return false;
6702
6703 /*
6704 * If the buffer contains the last boot data and all per-cpu
6705 * buffers are empty, reset it from the kernel side.
6706 */
6707 update_last_data(tr);
6708 return true;
6709 }
6710
6711 /*
6712 * Consumer reader.
6713 */
6714 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6715 tracing_read_pipe(struct file *filp, char __user *ubuf,
6716 size_t cnt, loff_t *ppos)
6717 {
6718 struct trace_iterator *iter = filp->private_data;
6719 ssize_t sret;
6720
6721 /*
6722 * Avoid more than one consumer on a single file descriptor
6723 * This is just a matter of traces coherency, the ring buffer itself
6724 * is protected.
6725 */
6726 guard(mutex)(&iter->mutex);
6727
6728 /* return any leftover data */
6729 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6730 if (sret != -EBUSY)
6731 return sret;
6732
6733 trace_seq_init(&iter->seq);
6734
6735 if (iter->trace->read) {
6736 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6737 if (sret)
6738 return sret;
6739 }
6740
6741 waitagain:
6742 if (update_last_data_if_empty(iter->tr))
6743 return 0;
6744
6745 sret = tracing_wait_pipe(filp);
6746 if (sret <= 0)
6747 return sret;
6748
6749 /* stop when tracing is finished */
6750 if (trace_empty(iter))
6751 return 0;
6752
6753 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6754 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6755
6756 /* reset all but tr, trace, and overruns */
6757 trace_iterator_reset(iter);
6758 cpumask_clear(iter->started);
6759 trace_seq_init(&iter->seq);
6760
6761 trace_event_read_lock();
6762 trace_access_lock(iter->cpu_file);
6763 while (trace_find_next_entry_inc(iter) != NULL) {
6764 enum print_line_t ret;
6765 int save_len = iter->seq.seq.len;
6766
6767 ret = print_trace_line(iter);
6768 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6769 /*
6770 * If one print_trace_line() fills entire trace_seq in one shot,
6771 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6772 * In this case, we need to consume it, otherwise, loop will peek
6773 * this event next time, resulting in an infinite loop.
6774 */
6775 if (save_len == 0) {
6776 iter->seq.full = 0;
6777 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6778 trace_consume(iter);
6779 break;
6780 }
6781
6782 /* In other cases, don't print partial lines */
6783 iter->seq.seq.len = save_len;
6784 break;
6785 }
6786 if (ret != TRACE_TYPE_NO_CONSUME)
6787 trace_consume(iter);
6788
6789 if (trace_seq_used(&iter->seq) >= cnt)
6790 break;
6791
6792 /*
6793 * Setting the full flag means we reached the trace_seq buffer
6794 * size and we should leave by partial output condition above.
6795 * One of the trace_seq_* functions is not used properly.
6796 */
6797 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6798 iter->ent->type);
6799 }
6800 trace_access_unlock(iter->cpu_file);
6801 trace_event_read_unlock();
6802
6803 /* Now copy what we have to the user */
6804 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6805 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6806 trace_seq_init(&iter->seq);
6807
6808 /*
6809 * If there was nothing to send to user, in spite of consuming trace
6810 * entries, go back to wait for more entries.
6811 */
6812 if (sret == -EBUSY)
6813 goto waitagain;
6814
6815 return sret;
6816 }
6817
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6818 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6819 unsigned int idx)
6820 {
6821 __free_page(spd->pages[idx]);
6822 }
6823
6824 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6825 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6826 {
6827 size_t count;
6828 int save_len;
6829 int ret;
6830
6831 /* Seq buffer is page-sized, exactly what we need. */
6832 for (;;) {
6833 save_len = iter->seq.seq.len;
6834 ret = print_trace_line(iter);
6835
6836 if (trace_seq_has_overflowed(&iter->seq)) {
6837 iter->seq.seq.len = save_len;
6838 break;
6839 }
6840
6841 /*
6842 * This should not be hit, because it should only
6843 * be set if the iter->seq overflowed. But check it
6844 * anyway to be safe.
6845 */
6846 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6847 iter->seq.seq.len = save_len;
6848 break;
6849 }
6850
6851 count = trace_seq_used(&iter->seq) - save_len;
6852 if (rem < count) {
6853 rem = 0;
6854 iter->seq.seq.len = save_len;
6855 break;
6856 }
6857
6858 if (ret != TRACE_TYPE_NO_CONSUME)
6859 trace_consume(iter);
6860 rem -= count;
6861 if (!trace_find_next_entry_inc(iter)) {
6862 rem = 0;
6863 iter->ent = NULL;
6864 break;
6865 }
6866 }
6867
6868 return rem;
6869 }
6870
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6871 static ssize_t tracing_splice_read_pipe(struct file *filp,
6872 loff_t *ppos,
6873 struct pipe_inode_info *pipe,
6874 size_t len,
6875 unsigned int flags)
6876 {
6877 struct page *pages_def[PIPE_DEF_BUFFERS];
6878 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6879 struct trace_iterator *iter = filp->private_data;
6880 struct splice_pipe_desc spd = {
6881 .pages = pages_def,
6882 .partial = partial_def,
6883 .nr_pages = 0, /* This gets updated below. */
6884 .nr_pages_max = PIPE_DEF_BUFFERS,
6885 .ops = &default_pipe_buf_ops,
6886 .spd_release = tracing_spd_release_pipe,
6887 };
6888 ssize_t ret;
6889 size_t rem;
6890 unsigned int i;
6891
6892 if (splice_grow_spd(pipe, &spd))
6893 return -ENOMEM;
6894
6895 mutex_lock(&iter->mutex);
6896
6897 if (iter->trace->splice_read) {
6898 ret = iter->trace->splice_read(iter, filp,
6899 ppos, pipe, len, flags);
6900 if (ret)
6901 goto out_err;
6902 }
6903
6904 ret = tracing_wait_pipe(filp);
6905 if (ret <= 0)
6906 goto out_err;
6907
6908 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6909 ret = -EFAULT;
6910 goto out_err;
6911 }
6912
6913 trace_event_read_lock();
6914 trace_access_lock(iter->cpu_file);
6915
6916 /* Fill as many pages as possible. */
6917 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6918 spd.pages[i] = alloc_page(GFP_KERNEL);
6919 if (!spd.pages[i])
6920 break;
6921
6922 rem = tracing_fill_pipe_page(rem, iter);
6923
6924 /* Copy the data into the page, so we can start over. */
6925 ret = trace_seq_to_buffer(&iter->seq,
6926 page_address(spd.pages[i]),
6927 min((size_t)trace_seq_used(&iter->seq),
6928 (size_t)PAGE_SIZE));
6929 if (ret < 0) {
6930 __free_page(spd.pages[i]);
6931 break;
6932 }
6933 spd.partial[i].offset = 0;
6934 spd.partial[i].len = ret;
6935
6936 trace_seq_init(&iter->seq);
6937 }
6938
6939 trace_access_unlock(iter->cpu_file);
6940 trace_event_read_unlock();
6941 mutex_unlock(&iter->mutex);
6942
6943 spd.nr_pages = i;
6944
6945 if (i)
6946 ret = splice_to_pipe(pipe, &spd);
6947 else
6948 ret = 0;
6949 out:
6950 splice_shrink_spd(&spd);
6951 return ret;
6952
6953 out_err:
6954 mutex_unlock(&iter->mutex);
6955 goto out;
6956 }
6957
6958 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6959 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6960 size_t cnt, loff_t *ppos)
6961 {
6962 struct inode *inode = file_inode(filp);
6963 struct trace_array *tr = inode->i_private;
6964 char buf[64];
6965 int r;
6966
6967 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6968
6969 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6970 }
6971
6972 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6973 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6974 size_t cnt, loff_t *ppos)
6975 {
6976 struct inode *inode = file_inode(filp);
6977 struct trace_array *tr = inode->i_private;
6978 unsigned long val;
6979 int ret;
6980
6981 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6982 if (ret)
6983 return ret;
6984
6985 if (val > SYSCALL_FAULT_USER_MAX)
6986 val = SYSCALL_FAULT_USER_MAX;
6987
6988 tr->syscall_buf_sz = val;
6989
6990 *ppos += cnt;
6991
6992 return cnt;
6993 }
6994
6995 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6996 tracing_entries_read(struct file *filp, char __user *ubuf,
6997 size_t cnt, loff_t *ppos)
6998 {
6999 struct inode *inode = file_inode(filp);
7000 struct trace_array *tr = inode->i_private;
7001 int cpu = tracing_get_cpu(inode);
7002 char buf[64];
7003 int r = 0;
7004 ssize_t ret;
7005
7006 mutex_lock(&trace_types_lock);
7007
7008 if (cpu == RING_BUFFER_ALL_CPUS) {
7009 int cpu, buf_size_same;
7010 unsigned long size;
7011
7012 size = 0;
7013 buf_size_same = 1;
7014 /* check if all cpu sizes are same */
7015 for_each_tracing_cpu(cpu) {
7016 /* fill in the size from first enabled cpu */
7017 if (size == 0)
7018 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7019 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7020 buf_size_same = 0;
7021 break;
7022 }
7023 }
7024
7025 if (buf_size_same) {
7026 if (!tr->ring_buffer_expanded)
7027 r = sprintf(buf, "%lu (expanded: %lu)\n",
7028 size >> 10,
7029 trace_buf_size >> 10);
7030 else
7031 r = sprintf(buf, "%lu\n", size >> 10);
7032 } else
7033 r = sprintf(buf, "X\n");
7034 } else
7035 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7036
7037 mutex_unlock(&trace_types_lock);
7038
7039 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7040 return ret;
7041 }
7042
7043 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7044 tracing_entries_write(struct file *filp, const char __user *ubuf,
7045 size_t cnt, loff_t *ppos)
7046 {
7047 struct inode *inode = file_inode(filp);
7048 struct trace_array *tr = inode->i_private;
7049 unsigned long val;
7050 int ret;
7051
7052 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7053 if (ret)
7054 return ret;
7055
7056 /* must have at least 1 entry */
7057 if (!val)
7058 return -EINVAL;
7059
7060 /* value is in KB */
7061 val <<= 10;
7062 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7063 if (ret < 0)
7064 return ret;
7065
7066 *ppos += cnt;
7067
7068 return cnt;
7069 }
7070
7071 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7072 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7073 size_t cnt, loff_t *ppos)
7074 {
7075 struct trace_array *tr = filp->private_data;
7076 char buf[64];
7077 int r, cpu;
7078 unsigned long size = 0, expanded_size = 0;
7079
7080 mutex_lock(&trace_types_lock);
7081 for_each_tracing_cpu(cpu) {
7082 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7083 if (!tr->ring_buffer_expanded)
7084 expanded_size += trace_buf_size >> 10;
7085 }
7086 if (tr->ring_buffer_expanded)
7087 r = sprintf(buf, "%lu\n", size);
7088 else
7089 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7090 mutex_unlock(&trace_types_lock);
7091
7092 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7093 }
7094
7095 #define LAST_BOOT_HEADER ((void *)1)
7096
l_next(struct seq_file * m,void * v,loff_t * pos)7097 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7098 {
7099 struct trace_array *tr = m->private;
7100 struct trace_scratch *tscratch = tr->scratch;
7101 unsigned int index = *pos;
7102
7103 (*pos)++;
7104
7105 if (*pos == 1)
7106 return LAST_BOOT_HEADER;
7107
7108 /* Only show offsets of the last boot data */
7109 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7110 return NULL;
7111
7112 /* *pos 0 is for the header, 1 is for the first module */
7113 index--;
7114
7115 if (index >= tscratch->nr_entries)
7116 return NULL;
7117
7118 return &tscratch->entries[index];
7119 }
7120
l_start(struct seq_file * m,loff_t * pos)7121 static void *l_start(struct seq_file *m, loff_t *pos)
7122 {
7123 mutex_lock(&scratch_mutex);
7124
7125 return l_next(m, NULL, pos);
7126 }
7127
l_stop(struct seq_file * m,void * p)7128 static void l_stop(struct seq_file *m, void *p)
7129 {
7130 mutex_unlock(&scratch_mutex);
7131 }
7132
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7133 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7134 {
7135 struct trace_scratch *tscratch = tr->scratch;
7136
7137 /*
7138 * Do not leak KASLR address. This only shows the KASLR address of
7139 * the last boot. When the ring buffer is started, the LAST_BOOT
7140 * flag gets cleared, and this should only report "current".
7141 * Otherwise it shows the KASLR address from the previous boot which
7142 * should not be the same as the current boot.
7143 */
7144 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7145 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7146 else
7147 seq_puts(m, "# Current\n");
7148 }
7149
l_show(struct seq_file * m,void * v)7150 static int l_show(struct seq_file *m, void *v)
7151 {
7152 struct trace_array *tr = m->private;
7153 struct trace_mod_entry *entry = v;
7154
7155 if (v == LAST_BOOT_HEADER) {
7156 show_last_boot_header(m, tr);
7157 return 0;
7158 }
7159
7160 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7161 return 0;
7162 }
7163
7164 static const struct seq_operations last_boot_seq_ops = {
7165 .start = l_start,
7166 .next = l_next,
7167 .stop = l_stop,
7168 .show = l_show,
7169 };
7170
tracing_last_boot_open(struct inode * inode,struct file * file)7171 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7172 {
7173 struct trace_array *tr = inode->i_private;
7174 struct seq_file *m;
7175 int ret;
7176
7177 ret = tracing_check_open_get_tr(tr);
7178 if (ret)
7179 return ret;
7180
7181 ret = seq_open(file, &last_boot_seq_ops);
7182 if (ret) {
7183 trace_array_put(tr);
7184 return ret;
7185 }
7186
7187 m = file->private_data;
7188 m->private = tr;
7189
7190 return 0;
7191 }
7192
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7193 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7194 {
7195 struct trace_array *tr = inode->i_private;
7196 int cpu = tracing_get_cpu(inode);
7197 int ret;
7198
7199 ret = tracing_check_open_get_tr(tr);
7200 if (ret)
7201 return ret;
7202
7203 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7204 if (ret < 0)
7205 __trace_array_put(tr);
7206 return ret;
7207 }
7208
7209 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7210 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7211 size_t cnt, loff_t *ppos)
7212 {
7213 /*
7214 * There is no need to read what the user has written, this function
7215 * is just to make sure that there is no error when "echo" is used
7216 */
7217
7218 *ppos += cnt;
7219
7220 return cnt;
7221 }
7222
7223 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7224 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7225 {
7226 struct trace_array *tr = inode->i_private;
7227
7228 /* disable tracing ? */
7229 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7230 tracer_tracing_off(tr);
7231 /* resize the ring buffer to 0 */
7232 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7233
7234 trace_array_put(tr);
7235
7236 return 0;
7237 }
7238
7239 #define TRACE_MARKER_MAX_SIZE 4096
7240
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7241 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7242 size_t cnt, unsigned long ip)
7243 {
7244 struct ring_buffer_event *event;
7245 enum event_trigger_type tt = ETT_NONE;
7246 struct trace_buffer *buffer;
7247 struct print_entry *entry;
7248 int meta_size;
7249 ssize_t written;
7250 size_t size;
7251
7252 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7253 again:
7254 size = cnt + meta_size;
7255
7256 buffer = tr->array_buffer.buffer;
7257 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7258 tracing_gen_ctx());
7259 if (unlikely(!event)) {
7260 /*
7261 * If the size was greater than what was allowed, then
7262 * make it smaller and try again.
7263 */
7264 if (size > ring_buffer_max_event_size(buffer)) {
7265 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7266 /* The above should only happen once */
7267 if (WARN_ON_ONCE(cnt + meta_size == size))
7268 return -EBADF;
7269 goto again;
7270 }
7271
7272 /* Ring buffer disabled, return as if not open for write */
7273 return -EBADF;
7274 }
7275
7276 entry = ring_buffer_event_data(event);
7277 entry->ip = ip;
7278 memcpy(&entry->buf, buf, cnt);
7279 written = cnt;
7280
7281 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7282 /* do not add \n before testing triggers, but add \0 */
7283 entry->buf[cnt] = '\0';
7284 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7285 }
7286
7287 if (entry->buf[cnt - 1] != '\n') {
7288 entry->buf[cnt] = '\n';
7289 entry->buf[cnt + 1] = '\0';
7290 } else
7291 entry->buf[cnt] = '\0';
7292
7293 if (static_branch_unlikely(&trace_marker_exports_enabled))
7294 ftrace_exports(event, TRACE_EXPORT_MARKER);
7295 __buffer_unlock_commit(buffer, event);
7296
7297 if (tt)
7298 event_triggers_post_call(tr->trace_marker_file, tt);
7299
7300 return written;
7301 }
7302
7303 struct trace_user_buf {
7304 char *buf;
7305 };
7306
7307 static DEFINE_MUTEX(trace_user_buffer_mutex);
7308 static struct trace_user_buf_info *trace_user_buffer;
7309
7310 /**
7311 * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7312 * @tinfo: The descriptor to free up
7313 *
7314 * Frees any data allocated in the trace info dsecriptor.
7315 */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)7316 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7317 {
7318 char *buf;
7319 int cpu;
7320
7321 if (!tinfo || !tinfo->tbuf)
7322 return;
7323
7324 for_each_possible_cpu(cpu) {
7325 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7326 kfree(buf);
7327 }
7328 free_percpu(tinfo->tbuf);
7329 }
7330
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)7331 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7332 {
7333 char *buf;
7334 int cpu;
7335
7336 lockdep_assert_held(&trace_user_buffer_mutex);
7337
7338 tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7339 if (!tinfo->tbuf)
7340 return -ENOMEM;
7341
7342 tinfo->ref = 1;
7343 tinfo->size = size;
7344
7345 /* Clear each buffer in case of error */
7346 for_each_possible_cpu(cpu) {
7347 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7348 }
7349
7350 for_each_possible_cpu(cpu) {
7351 buf = kmalloc_node(size, GFP_KERNEL,
7352 cpu_to_node(cpu));
7353 if (!buf)
7354 return -ENOMEM;
7355 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7356 }
7357
7358 return 0;
7359 }
7360
7361 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)7362 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7363 {
7364 lockdep_assert_held(&trace_user_buffer_mutex);
7365
7366 trace_user_fault_destroy(*tinfo);
7367 kfree(*tinfo);
7368 *tinfo = NULL;
7369 }
7370
7371 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)7372 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7373 {
7374 bool alloc = false;
7375 int ret;
7376
7377 lockdep_assert_held(&trace_user_buffer_mutex);
7378
7379 if (!*tinfo) {
7380 alloc = true;
7381 *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7382 if (!*tinfo)
7383 return -ENOMEM;
7384 }
7385
7386 ret = user_fault_buffer_enable(*tinfo, size);
7387 if (ret < 0 && alloc)
7388 user_buffer_free(tinfo);
7389
7390 return ret;
7391 }
7392
7393 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)7394 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7395 {
7396 guard(mutex)(&trace_user_buffer_mutex);
7397
7398 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7399 return;
7400
7401 if (--(*tinfo)->ref)
7402 return;
7403
7404 user_buffer_free(tinfo);
7405 }
7406
7407 /**
7408 * trace_user_fault_init - Allocated or reference a per CPU buffer
7409 * @tinfo: A pointer to the trace buffer descriptor
7410 * @size: The size to allocate each per CPU buffer
7411 *
7412 * Create a per CPU buffer that can be used to copy from user space
7413 * in a task context. When calling trace_user_fault_read(), preemption
7414 * must be disabled, and it will enable preemption and copy user
7415 * space data to the buffer. If any schedule switches occur, it will
7416 * retry until it succeeds without a schedule switch knowing the buffer
7417 * is still valid.
7418 *
7419 * Returns 0 on success, negative on failure.
7420 */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)7421 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7422 {
7423 int ret;
7424
7425 if (!tinfo)
7426 return -EINVAL;
7427
7428 guard(mutex)(&trace_user_buffer_mutex);
7429
7430 ret = user_buffer_init(&tinfo, size);
7431 if (ret < 0)
7432 trace_user_fault_destroy(tinfo);
7433
7434 return ret;
7435 }
7436
7437 /**
7438 * trace_user_fault_get - up the ref count for the user buffer
7439 * @tinfo: A pointer to a pointer to the trace buffer descriptor
7440 *
7441 * Ups the ref count of the trace buffer.
7442 *
7443 * Returns the new ref count.
7444 */
trace_user_fault_get(struct trace_user_buf_info * tinfo)7445 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7446 {
7447 if (!tinfo)
7448 return -1;
7449
7450 guard(mutex)(&trace_user_buffer_mutex);
7451
7452 tinfo->ref++;
7453 return tinfo->ref;
7454 }
7455
7456 /**
7457 * trace_user_fault_put - dereference a per cpu trace buffer
7458 * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7459 *
7460 * Decrement the ref count of @tinfo.
7461 *
7462 * Returns the new refcount (negative on error).
7463 */
trace_user_fault_put(struct trace_user_buf_info * tinfo)7464 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7465 {
7466 guard(mutex)(&trace_user_buffer_mutex);
7467
7468 if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7469 return -1;
7470
7471 --tinfo->ref;
7472 return tinfo->ref;
7473 }
7474
7475 /**
7476 * trace_user_fault_read - Read user space into a per CPU buffer
7477 * @tinfo: The @tinfo allocated by trace_user_fault_get()
7478 * @ptr: The user space pointer to read
7479 * @size: The size of user space to read.
7480 * @copy_func: Optional function to use to copy from user space
7481 * @data: Data to pass to copy_func if it was supplied
7482 *
7483 * Preemption must be disabled when this is called, and must not
7484 * be enabled while using the returned buffer.
7485 * This does the copying from user space into a per CPU buffer.
7486 *
7487 * The @size must not be greater than the size passed in to
7488 * trace_user_fault_init().
7489 *
7490 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7491 * otherwise it will call @copy_func. It will call @copy_func with:
7492 *
7493 * buffer: the per CPU buffer of the @tinfo.
7494 * ptr: The pointer @ptr to user space to read
7495 * size: The @size of the ptr to read
7496 * data: The @data parameter
7497 *
7498 * It is expected that @copy_func will return 0 on success and non zero
7499 * if there was a fault.
7500 *
7501 * Returns a pointer to the buffer with the content read from @ptr.
7502 * Preemption must remain disabled while the caller accesses the
7503 * buffer returned by this function.
7504 * Returns NULL if there was a fault, or the size passed in is
7505 * greater than the size passed to trace_user_fault_init().
7506 */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)7507 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7508 const char __user *ptr, size_t size,
7509 trace_user_buf_copy copy_func, void *data)
7510 {
7511 int cpu = smp_processor_id();
7512 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7513 unsigned int cnt;
7514 int trys = 0;
7515 int ret;
7516
7517 lockdep_assert_preemption_disabled();
7518
7519 /*
7520 * It's up to the caller to not try to copy more than it said
7521 * it would.
7522 */
7523 if (size > tinfo->size)
7524 return NULL;
7525
7526 /*
7527 * This acts similar to a seqcount. The per CPU context switches are
7528 * recorded, migration is disabled and preemption is enabled. The
7529 * read of the user space memory is copied into the per CPU buffer.
7530 * Preemption is disabled again, and if the per CPU context switches count
7531 * is still the same, it means the buffer has not been corrupted.
7532 * If the count is different, it is assumed the buffer is corrupted
7533 * and reading must be tried again.
7534 */
7535
7536 do {
7537 /*
7538 * If for some reason, copy_from_user() always causes a context
7539 * switch, this would then cause an infinite loop.
7540 * If this task is preempted by another user space task, it
7541 * will cause this task to try again. But just in case something
7542 * changes where the copying from user space causes another task
7543 * to run, prevent this from going into an infinite loop.
7544 * 100 tries should be plenty.
7545 */
7546 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7547 return NULL;
7548
7549 /* Read the current CPU context switch counter */
7550 cnt = nr_context_switches_cpu(cpu);
7551
7552 /*
7553 * Preemption is going to be enabled, but this task must
7554 * remain on this CPU.
7555 */
7556 migrate_disable();
7557
7558 /*
7559 * Now preemption is being enabled and another task can come in
7560 * and use the same buffer and corrupt our data.
7561 */
7562 preempt_enable_notrace();
7563
7564 /* Make sure preemption is enabled here */
7565 lockdep_assert_preemption_enabled();
7566
7567 if (copy_func) {
7568 ret = copy_func(buffer, ptr, size, data);
7569 } else {
7570 ret = __copy_from_user(buffer, ptr, size);
7571 }
7572
7573 preempt_disable_notrace();
7574 migrate_enable();
7575
7576 /* if it faulted, no need to test if the buffer was corrupted */
7577 if (ret)
7578 return NULL;
7579
7580 /*
7581 * Preemption is disabled again, now check the per CPU context
7582 * switch counter. If it doesn't match, then another user space
7583 * process may have schedule in and corrupted our buffer. In that
7584 * case the copying must be retried.
7585 */
7586 } while (nr_context_switches_cpu(cpu) != cnt);
7587
7588 return buffer;
7589 }
7590
7591 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7592 tracing_mark_write(struct file *filp, const char __user *ubuf,
7593 size_t cnt, loff_t *fpos)
7594 {
7595 struct trace_array *tr = filp->private_data;
7596 ssize_t written = -ENODEV;
7597 unsigned long ip;
7598 char *buf;
7599
7600 if (tracing_disabled)
7601 return -EINVAL;
7602
7603 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7604 return -EINVAL;
7605
7606 if ((ssize_t)cnt < 0)
7607 return -EINVAL;
7608
7609 if (cnt > TRACE_MARKER_MAX_SIZE)
7610 cnt = TRACE_MARKER_MAX_SIZE;
7611
7612 /* Must have preemption disabled while having access to the buffer */
7613 guard(preempt_notrace)();
7614
7615 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7616 if (!buf)
7617 return -EFAULT;
7618
7619 /* The selftests expect this function to be the IP address */
7620 ip = _THIS_IP_;
7621
7622 /* The global trace_marker can go to multiple instances */
7623 if (tr == &global_trace) {
7624 guard(rcu)();
7625 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7626 written = write_marker_to_buffer(tr, buf, cnt, ip);
7627 if (written < 0)
7628 break;
7629 }
7630 } else {
7631 written = write_marker_to_buffer(tr, buf, cnt, ip);
7632 }
7633
7634 return written;
7635 }
7636
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7637 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7638 const char *buf, size_t cnt)
7639 {
7640 struct ring_buffer_event *event;
7641 struct trace_buffer *buffer;
7642 struct raw_data_entry *entry;
7643 ssize_t written;
7644 size_t size;
7645
7646 /* cnt includes both the entry->id and the data behind it. */
7647 size = struct_offset(entry, id) + cnt;
7648
7649 buffer = tr->array_buffer.buffer;
7650
7651 if (size > ring_buffer_max_event_size(buffer))
7652 return -EINVAL;
7653
7654 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7655 tracing_gen_ctx());
7656 if (!event)
7657 /* Ring buffer disabled, return as if not open for write */
7658 return -EBADF;
7659
7660 entry = ring_buffer_event_data(event);
7661 unsafe_memcpy(&entry->id, buf, cnt,
7662 "id and content already reserved on ring buffer"
7663 "'buf' includes the 'id' and the data."
7664 "'entry' was allocated with cnt from 'id'.");
7665 written = cnt;
7666
7667 __buffer_unlock_commit(buffer, event);
7668
7669 return written;
7670 }
7671
7672 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7673 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7674 size_t cnt, loff_t *fpos)
7675 {
7676 struct trace_array *tr = filp->private_data;
7677 ssize_t written = -ENODEV;
7678 char *buf;
7679
7680 if (tracing_disabled)
7681 return -EINVAL;
7682
7683 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7684 return -EINVAL;
7685
7686 /* The marker must at least have a tag id */
7687 if (cnt < sizeof(unsigned int))
7688 return -EINVAL;
7689
7690 /* raw write is all or nothing */
7691 if (cnt > TRACE_MARKER_MAX_SIZE)
7692 return -EINVAL;
7693
7694 /* Must have preemption disabled while having access to the buffer */
7695 guard(preempt_notrace)();
7696
7697 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7698 if (!buf)
7699 return -EFAULT;
7700
7701 /* The global trace_marker_raw can go to multiple instances */
7702 if (tr == &global_trace) {
7703 guard(rcu)();
7704 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7705 written = write_raw_marker_to_buffer(tr, buf, cnt);
7706 if (written < 0)
7707 break;
7708 }
7709 } else {
7710 written = write_raw_marker_to_buffer(tr, buf, cnt);
7711 }
7712
7713 return written;
7714 }
7715
tracing_mark_open(struct inode * inode,struct file * filp)7716 static int tracing_mark_open(struct inode *inode, struct file *filp)
7717 {
7718 int ret;
7719
7720 scoped_guard(mutex, &trace_user_buffer_mutex) {
7721 if (!trace_user_buffer) {
7722 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7723 if (ret < 0)
7724 return ret;
7725 } else {
7726 trace_user_buffer->ref++;
7727 }
7728 }
7729
7730 stream_open(inode, filp);
7731 ret = tracing_open_generic_tr(inode, filp);
7732 if (ret < 0)
7733 user_buffer_put(&trace_user_buffer);
7734 return ret;
7735 }
7736
tracing_mark_release(struct inode * inode,struct file * file)7737 static int tracing_mark_release(struct inode *inode, struct file *file)
7738 {
7739 user_buffer_put(&trace_user_buffer);
7740 return tracing_release_generic_tr(inode, file);
7741 }
7742
tracing_clock_show(struct seq_file * m,void * v)7743 static int tracing_clock_show(struct seq_file *m, void *v)
7744 {
7745 struct trace_array *tr = m->private;
7746 int i;
7747
7748 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7749 seq_printf(m,
7750 "%s%s%s%s", i ? " " : "",
7751 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7752 i == tr->clock_id ? "]" : "");
7753 seq_putc(m, '\n');
7754
7755 return 0;
7756 }
7757
tracing_set_clock(struct trace_array * tr,const char * clockstr)7758 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7759 {
7760 int i;
7761
7762 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7763 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7764 break;
7765 }
7766 if (i == ARRAY_SIZE(trace_clocks))
7767 return -EINVAL;
7768
7769 guard(mutex)(&trace_types_lock);
7770
7771 tr->clock_id = i;
7772
7773 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7774
7775 /*
7776 * New clock may not be consistent with the previous clock.
7777 * Reset the buffer so that it doesn't have incomparable timestamps.
7778 */
7779 tracing_reset_online_cpus(&tr->array_buffer);
7780
7781 #ifdef CONFIG_TRACER_MAX_TRACE
7782 if (tr->max_buffer.buffer)
7783 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7784 tracing_reset_online_cpus(&tr->max_buffer);
7785 #endif
7786
7787 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7788 struct trace_scratch *tscratch = tr->scratch;
7789
7790 tscratch->clock_id = i;
7791 }
7792
7793 return 0;
7794 }
7795
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7796 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7797 size_t cnt, loff_t *fpos)
7798 {
7799 struct seq_file *m = filp->private_data;
7800 struct trace_array *tr = m->private;
7801 char buf[64];
7802 const char *clockstr;
7803 int ret;
7804
7805 if (cnt >= sizeof(buf))
7806 return -EINVAL;
7807
7808 if (copy_from_user(buf, ubuf, cnt))
7809 return -EFAULT;
7810
7811 buf[cnt] = 0;
7812
7813 clockstr = strstrip(buf);
7814
7815 ret = tracing_set_clock(tr, clockstr);
7816 if (ret)
7817 return ret;
7818
7819 *fpos += cnt;
7820
7821 return cnt;
7822 }
7823
tracing_clock_open(struct inode * inode,struct file * file)7824 static int tracing_clock_open(struct inode *inode, struct file *file)
7825 {
7826 struct trace_array *tr = inode->i_private;
7827 int ret;
7828
7829 ret = tracing_check_open_get_tr(tr);
7830 if (ret)
7831 return ret;
7832
7833 ret = single_open(file, tracing_clock_show, inode->i_private);
7834 if (ret < 0)
7835 trace_array_put(tr);
7836
7837 return ret;
7838 }
7839
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7840 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7841 {
7842 struct trace_array *tr = m->private;
7843
7844 guard(mutex)(&trace_types_lock);
7845
7846 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7847 seq_puts(m, "delta [absolute]\n");
7848 else
7849 seq_puts(m, "[delta] absolute\n");
7850
7851 return 0;
7852 }
7853
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7854 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7855 {
7856 struct trace_array *tr = inode->i_private;
7857 int ret;
7858
7859 ret = tracing_check_open_get_tr(tr);
7860 if (ret)
7861 return ret;
7862
7863 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7864 if (ret < 0)
7865 trace_array_put(tr);
7866
7867 return ret;
7868 }
7869
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7870 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7871 {
7872 if (rbe == this_cpu_read(trace_buffered_event))
7873 return ring_buffer_time_stamp(buffer);
7874
7875 return ring_buffer_event_time_stamp(buffer, rbe);
7876 }
7877
7878 /*
7879 * Set or disable using the per CPU trace_buffer_event when possible.
7880 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7881 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7882 {
7883 guard(mutex)(&trace_types_lock);
7884
7885 if (set && tr->no_filter_buffering_ref++)
7886 return 0;
7887
7888 if (!set) {
7889 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7890 return -EINVAL;
7891
7892 --tr->no_filter_buffering_ref;
7893 }
7894
7895 return 0;
7896 }
7897
7898 struct ftrace_buffer_info {
7899 struct trace_iterator iter;
7900 void *spare;
7901 unsigned int spare_cpu;
7902 unsigned int spare_size;
7903 unsigned int read;
7904 };
7905
7906 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7907 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7908 {
7909 struct trace_array *tr = inode->i_private;
7910 struct trace_iterator *iter;
7911 struct seq_file *m;
7912 int ret;
7913
7914 ret = tracing_check_open_get_tr(tr);
7915 if (ret)
7916 return ret;
7917
7918 if (file->f_mode & FMODE_READ) {
7919 iter = __tracing_open(inode, file, true);
7920 if (IS_ERR(iter))
7921 ret = PTR_ERR(iter);
7922 } else {
7923 /* Writes still need the seq_file to hold the private data */
7924 ret = -ENOMEM;
7925 m = kzalloc(sizeof(*m), GFP_KERNEL);
7926 if (!m)
7927 goto out;
7928 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7929 if (!iter) {
7930 kfree(m);
7931 goto out;
7932 }
7933 ret = 0;
7934
7935 iter->tr = tr;
7936 iter->array_buffer = &tr->max_buffer;
7937 iter->cpu_file = tracing_get_cpu(inode);
7938 m->private = iter;
7939 file->private_data = m;
7940 }
7941 out:
7942 if (ret < 0)
7943 trace_array_put(tr);
7944
7945 return ret;
7946 }
7947
tracing_swap_cpu_buffer(void * tr)7948 static void tracing_swap_cpu_buffer(void *tr)
7949 {
7950 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7951 }
7952
7953 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7954 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7955 loff_t *ppos)
7956 {
7957 struct seq_file *m = filp->private_data;
7958 struct trace_iterator *iter = m->private;
7959 struct trace_array *tr = iter->tr;
7960 unsigned long val;
7961 int ret;
7962
7963 ret = tracing_update_buffers(tr);
7964 if (ret < 0)
7965 return ret;
7966
7967 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7968 if (ret)
7969 return ret;
7970
7971 guard(mutex)(&trace_types_lock);
7972
7973 if (tr->current_trace->use_max_tr)
7974 return -EBUSY;
7975
7976 local_irq_disable();
7977 arch_spin_lock(&tr->max_lock);
7978 if (tr->cond_snapshot)
7979 ret = -EBUSY;
7980 arch_spin_unlock(&tr->max_lock);
7981 local_irq_enable();
7982 if (ret)
7983 return ret;
7984
7985 switch (val) {
7986 case 0:
7987 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7988 return -EINVAL;
7989 if (tr->allocated_snapshot)
7990 free_snapshot(tr);
7991 break;
7992 case 1:
7993 /* Only allow per-cpu swap if the ring buffer supports it */
7994 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7995 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7996 return -EINVAL;
7997 #endif
7998 if (tr->allocated_snapshot)
7999 ret = resize_buffer_duplicate_size(&tr->max_buffer,
8000 &tr->array_buffer, iter->cpu_file);
8001
8002 ret = tracing_arm_snapshot_locked(tr);
8003 if (ret)
8004 return ret;
8005
8006 /* Now, we're going to swap */
8007 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8008 local_irq_disable();
8009 update_max_tr(tr, current, smp_processor_id(), NULL);
8010 local_irq_enable();
8011 } else {
8012 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8013 (void *)tr, 1);
8014 }
8015 tracing_disarm_snapshot(tr);
8016 break;
8017 default:
8018 if (tr->allocated_snapshot) {
8019 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8020 tracing_reset_online_cpus(&tr->max_buffer);
8021 else
8022 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8023 }
8024 break;
8025 }
8026
8027 if (ret >= 0) {
8028 *ppos += cnt;
8029 ret = cnt;
8030 }
8031
8032 return ret;
8033 }
8034
tracing_snapshot_release(struct inode * inode,struct file * file)8035 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8036 {
8037 struct seq_file *m = file->private_data;
8038 int ret;
8039
8040 ret = tracing_release(inode, file);
8041
8042 if (file->f_mode & FMODE_READ)
8043 return ret;
8044
8045 /* If write only, the seq_file is just a stub */
8046 if (m)
8047 kfree(m->private);
8048 kfree(m);
8049
8050 return 0;
8051 }
8052
8053 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8054 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8055 size_t count, loff_t *ppos);
8056 static int tracing_buffers_release(struct inode *inode, struct file *file);
8057 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8058 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8059
snapshot_raw_open(struct inode * inode,struct file * filp)8060 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8061 {
8062 struct ftrace_buffer_info *info;
8063 int ret;
8064
8065 /* The following checks for tracefs lockdown */
8066 ret = tracing_buffers_open(inode, filp);
8067 if (ret < 0)
8068 return ret;
8069
8070 info = filp->private_data;
8071
8072 if (info->iter.trace->use_max_tr) {
8073 tracing_buffers_release(inode, filp);
8074 return -EBUSY;
8075 }
8076
8077 info->iter.snapshot = true;
8078 info->iter.array_buffer = &info->iter.tr->max_buffer;
8079
8080 return ret;
8081 }
8082
8083 #endif /* CONFIG_TRACER_SNAPSHOT */
8084
8085
8086 static const struct file_operations tracing_thresh_fops = {
8087 .open = tracing_open_generic,
8088 .read = tracing_thresh_read,
8089 .write = tracing_thresh_write,
8090 .llseek = generic_file_llseek,
8091 };
8092
8093 #ifdef CONFIG_TRACER_MAX_TRACE
8094 static const struct file_operations tracing_max_lat_fops = {
8095 .open = tracing_open_generic_tr,
8096 .read = tracing_max_lat_read,
8097 .write = tracing_max_lat_write,
8098 .llseek = generic_file_llseek,
8099 .release = tracing_release_generic_tr,
8100 };
8101 #endif
8102
8103 static const struct file_operations set_tracer_fops = {
8104 .open = tracing_open_generic_tr,
8105 .read = tracing_set_trace_read,
8106 .write = tracing_set_trace_write,
8107 .llseek = generic_file_llseek,
8108 .release = tracing_release_generic_tr,
8109 };
8110
8111 static const struct file_operations tracing_pipe_fops = {
8112 .open = tracing_open_pipe,
8113 .poll = tracing_poll_pipe,
8114 .read = tracing_read_pipe,
8115 .splice_read = tracing_splice_read_pipe,
8116 .release = tracing_release_pipe,
8117 };
8118
8119 static const struct file_operations tracing_entries_fops = {
8120 .open = tracing_open_generic_tr,
8121 .read = tracing_entries_read,
8122 .write = tracing_entries_write,
8123 .llseek = generic_file_llseek,
8124 .release = tracing_release_generic_tr,
8125 };
8126
8127 static const struct file_operations tracing_syscall_buf_fops = {
8128 .open = tracing_open_generic_tr,
8129 .read = tracing_syscall_buf_read,
8130 .write = tracing_syscall_buf_write,
8131 .llseek = generic_file_llseek,
8132 .release = tracing_release_generic_tr,
8133 };
8134
8135 static const struct file_operations tracing_buffer_meta_fops = {
8136 .open = tracing_buffer_meta_open,
8137 .read = seq_read,
8138 .llseek = seq_lseek,
8139 .release = tracing_seq_release,
8140 };
8141
8142 static const struct file_operations tracing_total_entries_fops = {
8143 .open = tracing_open_generic_tr,
8144 .read = tracing_total_entries_read,
8145 .llseek = generic_file_llseek,
8146 .release = tracing_release_generic_tr,
8147 };
8148
8149 static const struct file_operations tracing_free_buffer_fops = {
8150 .open = tracing_open_generic_tr,
8151 .write = tracing_free_buffer_write,
8152 .release = tracing_free_buffer_release,
8153 };
8154
8155 static const struct file_operations tracing_mark_fops = {
8156 .open = tracing_mark_open,
8157 .write = tracing_mark_write,
8158 .release = tracing_mark_release,
8159 };
8160
8161 static const struct file_operations tracing_mark_raw_fops = {
8162 .open = tracing_mark_open,
8163 .write = tracing_mark_raw_write,
8164 .release = tracing_mark_release,
8165 };
8166
8167 static const struct file_operations trace_clock_fops = {
8168 .open = tracing_clock_open,
8169 .read = seq_read,
8170 .llseek = seq_lseek,
8171 .release = tracing_single_release_tr,
8172 .write = tracing_clock_write,
8173 };
8174
8175 static const struct file_operations trace_time_stamp_mode_fops = {
8176 .open = tracing_time_stamp_mode_open,
8177 .read = seq_read,
8178 .llseek = seq_lseek,
8179 .release = tracing_single_release_tr,
8180 };
8181
8182 static const struct file_operations last_boot_fops = {
8183 .open = tracing_last_boot_open,
8184 .read = seq_read,
8185 .llseek = seq_lseek,
8186 .release = tracing_seq_release,
8187 };
8188
8189 #ifdef CONFIG_TRACER_SNAPSHOT
8190 static const struct file_operations snapshot_fops = {
8191 .open = tracing_snapshot_open,
8192 .read = seq_read,
8193 .write = tracing_snapshot_write,
8194 .llseek = tracing_lseek,
8195 .release = tracing_snapshot_release,
8196 };
8197
8198 static const struct file_operations snapshot_raw_fops = {
8199 .open = snapshot_raw_open,
8200 .read = tracing_buffers_read,
8201 .release = tracing_buffers_release,
8202 .splice_read = tracing_buffers_splice_read,
8203 };
8204
8205 #endif /* CONFIG_TRACER_SNAPSHOT */
8206
8207 /*
8208 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8209 * @filp: The active open file structure
8210 * @ubuf: The userspace provided buffer to read value into
8211 * @cnt: The maximum number of bytes to read
8212 * @ppos: The current "file" position
8213 *
8214 * This function implements the write interface for a struct trace_min_max_param.
8215 * The filp->private_data must point to a trace_min_max_param structure that
8216 * defines where to write the value, the min and the max acceptable values,
8217 * and a lock to protect the write.
8218 */
8219 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8220 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8221 {
8222 struct trace_min_max_param *param = filp->private_data;
8223 u64 val;
8224 int err;
8225
8226 if (!param)
8227 return -EFAULT;
8228
8229 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8230 if (err)
8231 return err;
8232
8233 if (param->lock)
8234 mutex_lock(param->lock);
8235
8236 if (param->min && val < *param->min)
8237 err = -EINVAL;
8238
8239 if (param->max && val > *param->max)
8240 err = -EINVAL;
8241
8242 if (!err)
8243 *param->val = val;
8244
8245 if (param->lock)
8246 mutex_unlock(param->lock);
8247
8248 if (err)
8249 return err;
8250
8251 return cnt;
8252 }
8253
8254 /*
8255 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8256 * @filp: The active open file structure
8257 * @ubuf: The userspace provided buffer to read value into
8258 * @cnt: The maximum number of bytes to read
8259 * @ppos: The current "file" position
8260 *
8261 * This function implements the read interface for a struct trace_min_max_param.
8262 * The filp->private_data must point to a trace_min_max_param struct with valid
8263 * data.
8264 */
8265 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8266 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8267 {
8268 struct trace_min_max_param *param = filp->private_data;
8269 char buf[U64_STR_SIZE];
8270 int len;
8271 u64 val;
8272
8273 if (!param)
8274 return -EFAULT;
8275
8276 val = *param->val;
8277
8278 if (cnt > sizeof(buf))
8279 cnt = sizeof(buf);
8280
8281 len = snprintf(buf, sizeof(buf), "%llu\n", val);
8282
8283 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8284 }
8285
8286 const struct file_operations trace_min_max_fops = {
8287 .open = tracing_open_generic,
8288 .read = trace_min_max_read,
8289 .write = trace_min_max_write,
8290 };
8291
8292 #define TRACING_LOG_ERRS_MAX 8
8293 #define TRACING_LOG_LOC_MAX 128
8294
8295 #define CMD_PREFIX " Command: "
8296
8297 struct err_info {
8298 const char **errs; /* ptr to loc-specific array of err strings */
8299 u8 type; /* index into errs -> specific err string */
8300 u16 pos; /* caret position */
8301 u64 ts;
8302 };
8303
8304 struct tracing_log_err {
8305 struct list_head list;
8306 struct err_info info;
8307 char loc[TRACING_LOG_LOC_MAX]; /* err location */
8308 char *cmd; /* what caused err */
8309 };
8310
8311 static DEFINE_MUTEX(tracing_err_log_lock);
8312
alloc_tracing_log_err(int len)8313 static struct tracing_log_err *alloc_tracing_log_err(int len)
8314 {
8315 struct tracing_log_err *err;
8316
8317 err = kzalloc(sizeof(*err), GFP_KERNEL);
8318 if (!err)
8319 return ERR_PTR(-ENOMEM);
8320
8321 err->cmd = kzalloc(len, GFP_KERNEL);
8322 if (!err->cmd) {
8323 kfree(err);
8324 return ERR_PTR(-ENOMEM);
8325 }
8326
8327 return err;
8328 }
8329
free_tracing_log_err(struct tracing_log_err * err)8330 static void free_tracing_log_err(struct tracing_log_err *err)
8331 {
8332 kfree(err->cmd);
8333 kfree(err);
8334 }
8335
get_tracing_log_err(struct trace_array * tr,int len)8336 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8337 int len)
8338 {
8339 struct tracing_log_err *err;
8340 char *cmd;
8341
8342 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8343 err = alloc_tracing_log_err(len);
8344 if (PTR_ERR(err) != -ENOMEM)
8345 tr->n_err_log_entries++;
8346
8347 return err;
8348 }
8349 cmd = kzalloc(len, GFP_KERNEL);
8350 if (!cmd)
8351 return ERR_PTR(-ENOMEM);
8352 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8353 kfree(err->cmd);
8354 err->cmd = cmd;
8355 list_del(&err->list);
8356
8357 return err;
8358 }
8359
8360 /**
8361 * err_pos - find the position of a string within a command for error careting
8362 * @cmd: The tracing command that caused the error
8363 * @str: The string to position the caret at within @cmd
8364 *
8365 * Finds the position of the first occurrence of @str within @cmd. The
8366 * return value can be passed to tracing_log_err() for caret placement
8367 * within @cmd.
8368 *
8369 * Returns the index within @cmd of the first occurrence of @str or 0
8370 * if @str was not found.
8371 */
err_pos(char * cmd,const char * str)8372 unsigned int err_pos(char *cmd, const char *str)
8373 {
8374 char *found;
8375
8376 if (WARN_ON(!strlen(cmd)))
8377 return 0;
8378
8379 found = strstr(cmd, str);
8380 if (found)
8381 return found - cmd;
8382
8383 return 0;
8384 }
8385
8386 /**
8387 * tracing_log_err - write an error to the tracing error log
8388 * @tr: The associated trace array for the error (NULL for top level array)
8389 * @loc: A string describing where the error occurred
8390 * @cmd: The tracing command that caused the error
8391 * @errs: The array of loc-specific static error strings
8392 * @type: The index into errs[], which produces the specific static err string
8393 * @pos: The position the caret should be placed in the cmd
8394 *
8395 * Writes an error into tracing/error_log of the form:
8396 *
8397 * <loc>: error: <text>
8398 * Command: <cmd>
8399 * ^
8400 *
8401 * tracing/error_log is a small log file containing the last
8402 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8403 * unless there has been a tracing error, and the error log can be
8404 * cleared and have its memory freed by writing the empty string in
8405 * truncation mode to it i.e. echo > tracing/error_log.
8406 *
8407 * NOTE: the @errs array along with the @type param are used to
8408 * produce a static error string - this string is not copied and saved
8409 * when the error is logged - only a pointer to it is saved. See
8410 * existing callers for examples of how static strings are typically
8411 * defined for use with tracing_log_err().
8412 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8413 void tracing_log_err(struct trace_array *tr,
8414 const char *loc, const char *cmd,
8415 const char **errs, u8 type, u16 pos)
8416 {
8417 struct tracing_log_err *err;
8418 int len = 0;
8419
8420 if (!tr)
8421 tr = &global_trace;
8422
8423 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8424
8425 guard(mutex)(&tracing_err_log_lock);
8426
8427 err = get_tracing_log_err(tr, len);
8428 if (PTR_ERR(err) == -ENOMEM)
8429 return;
8430
8431 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8432 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8433
8434 err->info.errs = errs;
8435 err->info.type = type;
8436 err->info.pos = pos;
8437 err->info.ts = local_clock();
8438
8439 list_add_tail(&err->list, &tr->err_log);
8440 }
8441
clear_tracing_err_log(struct trace_array * tr)8442 static void clear_tracing_err_log(struct trace_array *tr)
8443 {
8444 struct tracing_log_err *err, *next;
8445
8446 guard(mutex)(&tracing_err_log_lock);
8447
8448 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8449 list_del(&err->list);
8450 free_tracing_log_err(err);
8451 }
8452
8453 tr->n_err_log_entries = 0;
8454 }
8455
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8456 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8457 {
8458 struct trace_array *tr = m->private;
8459
8460 mutex_lock(&tracing_err_log_lock);
8461
8462 return seq_list_start(&tr->err_log, *pos);
8463 }
8464
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8465 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8466 {
8467 struct trace_array *tr = m->private;
8468
8469 return seq_list_next(v, &tr->err_log, pos);
8470 }
8471
tracing_err_log_seq_stop(struct seq_file * m,void * v)8472 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8473 {
8474 mutex_unlock(&tracing_err_log_lock);
8475 }
8476
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8477 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8478 {
8479 u16 i;
8480
8481 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8482 seq_putc(m, ' ');
8483 for (i = 0; i < pos; i++)
8484 seq_putc(m, ' ');
8485 seq_puts(m, "^\n");
8486 }
8487
tracing_err_log_seq_show(struct seq_file * m,void * v)8488 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8489 {
8490 struct tracing_log_err *err = v;
8491
8492 if (err) {
8493 const char *err_text = err->info.errs[err->info.type];
8494 u64 sec = err->info.ts;
8495 u32 nsec;
8496
8497 nsec = do_div(sec, NSEC_PER_SEC);
8498 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8499 err->loc, err_text);
8500 seq_printf(m, "%s", err->cmd);
8501 tracing_err_log_show_pos(m, err->info.pos);
8502 }
8503
8504 return 0;
8505 }
8506
8507 static const struct seq_operations tracing_err_log_seq_ops = {
8508 .start = tracing_err_log_seq_start,
8509 .next = tracing_err_log_seq_next,
8510 .stop = tracing_err_log_seq_stop,
8511 .show = tracing_err_log_seq_show
8512 };
8513
tracing_err_log_open(struct inode * inode,struct file * file)8514 static int tracing_err_log_open(struct inode *inode, struct file *file)
8515 {
8516 struct trace_array *tr = inode->i_private;
8517 int ret = 0;
8518
8519 ret = tracing_check_open_get_tr(tr);
8520 if (ret)
8521 return ret;
8522
8523 /* If this file was opened for write, then erase contents */
8524 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8525 clear_tracing_err_log(tr);
8526
8527 if (file->f_mode & FMODE_READ) {
8528 ret = seq_open(file, &tracing_err_log_seq_ops);
8529 if (!ret) {
8530 struct seq_file *m = file->private_data;
8531 m->private = tr;
8532 } else {
8533 trace_array_put(tr);
8534 }
8535 }
8536 return ret;
8537 }
8538
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8539 static ssize_t tracing_err_log_write(struct file *file,
8540 const char __user *buffer,
8541 size_t count, loff_t *ppos)
8542 {
8543 return count;
8544 }
8545
tracing_err_log_release(struct inode * inode,struct file * file)8546 static int tracing_err_log_release(struct inode *inode, struct file *file)
8547 {
8548 struct trace_array *tr = inode->i_private;
8549
8550 trace_array_put(tr);
8551
8552 if (file->f_mode & FMODE_READ)
8553 seq_release(inode, file);
8554
8555 return 0;
8556 }
8557
8558 static const struct file_operations tracing_err_log_fops = {
8559 .open = tracing_err_log_open,
8560 .write = tracing_err_log_write,
8561 .read = seq_read,
8562 .llseek = tracing_lseek,
8563 .release = tracing_err_log_release,
8564 };
8565
tracing_buffers_open(struct inode * inode,struct file * filp)8566 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8567 {
8568 struct trace_array *tr = inode->i_private;
8569 struct ftrace_buffer_info *info;
8570 int ret;
8571
8572 ret = tracing_check_open_get_tr(tr);
8573 if (ret)
8574 return ret;
8575
8576 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8577 if (!info) {
8578 trace_array_put(tr);
8579 return -ENOMEM;
8580 }
8581
8582 mutex_lock(&trace_types_lock);
8583
8584 info->iter.tr = tr;
8585 info->iter.cpu_file = tracing_get_cpu(inode);
8586 info->iter.trace = tr->current_trace;
8587 info->iter.array_buffer = &tr->array_buffer;
8588 info->spare = NULL;
8589 /* Force reading ring buffer for first read */
8590 info->read = (unsigned int)-1;
8591
8592 filp->private_data = info;
8593
8594 tr->trace_ref++;
8595
8596 mutex_unlock(&trace_types_lock);
8597
8598 ret = nonseekable_open(inode, filp);
8599 if (ret < 0)
8600 trace_array_put(tr);
8601
8602 return ret;
8603 }
8604
8605 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8606 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8607 {
8608 struct ftrace_buffer_info *info = filp->private_data;
8609 struct trace_iterator *iter = &info->iter;
8610
8611 return trace_poll(iter, filp, poll_table);
8612 }
8613
8614 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8615 tracing_buffers_read(struct file *filp, char __user *ubuf,
8616 size_t count, loff_t *ppos)
8617 {
8618 struct ftrace_buffer_info *info = filp->private_data;
8619 struct trace_iterator *iter = &info->iter;
8620 void *trace_data;
8621 int page_size;
8622 ssize_t ret = 0;
8623 ssize_t size;
8624
8625 if (!count)
8626 return 0;
8627
8628 #ifdef CONFIG_TRACER_MAX_TRACE
8629 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8630 return -EBUSY;
8631 #endif
8632
8633 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8634
8635 /* Make sure the spare matches the current sub buffer size */
8636 if (info->spare) {
8637 if (page_size != info->spare_size) {
8638 ring_buffer_free_read_page(iter->array_buffer->buffer,
8639 info->spare_cpu, info->spare);
8640 info->spare = NULL;
8641 }
8642 }
8643
8644 if (!info->spare) {
8645 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8646 iter->cpu_file);
8647 if (IS_ERR(info->spare)) {
8648 ret = PTR_ERR(info->spare);
8649 info->spare = NULL;
8650 } else {
8651 info->spare_cpu = iter->cpu_file;
8652 info->spare_size = page_size;
8653 }
8654 }
8655 if (!info->spare)
8656 return ret;
8657
8658 /* Do we have previous read data to read? */
8659 if (info->read < page_size)
8660 goto read;
8661
8662 again:
8663 trace_access_lock(iter->cpu_file);
8664 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8665 info->spare,
8666 count,
8667 iter->cpu_file, 0);
8668 trace_access_unlock(iter->cpu_file);
8669
8670 if (ret < 0) {
8671 if (trace_empty(iter) && !iter->closed) {
8672 if (update_last_data_if_empty(iter->tr))
8673 return 0;
8674
8675 if ((filp->f_flags & O_NONBLOCK))
8676 return -EAGAIN;
8677
8678 ret = wait_on_pipe(iter, 0);
8679 if (ret)
8680 return ret;
8681
8682 goto again;
8683 }
8684 return 0;
8685 }
8686
8687 info->read = 0;
8688 read:
8689 size = page_size - info->read;
8690 if (size > count)
8691 size = count;
8692 trace_data = ring_buffer_read_page_data(info->spare);
8693 ret = copy_to_user(ubuf, trace_data + info->read, size);
8694 if (ret == size)
8695 return -EFAULT;
8696
8697 size -= ret;
8698
8699 *ppos += size;
8700 info->read += size;
8701
8702 return size;
8703 }
8704
tracing_buffers_flush(struct file * file,fl_owner_t id)8705 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8706 {
8707 struct ftrace_buffer_info *info = file->private_data;
8708 struct trace_iterator *iter = &info->iter;
8709
8710 iter->closed = true;
8711 /* Make sure the waiters see the new wait_index */
8712 (void)atomic_fetch_inc_release(&iter->wait_index);
8713
8714 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8715
8716 return 0;
8717 }
8718
tracing_buffers_release(struct inode * inode,struct file * file)8719 static int tracing_buffers_release(struct inode *inode, struct file *file)
8720 {
8721 struct ftrace_buffer_info *info = file->private_data;
8722 struct trace_iterator *iter = &info->iter;
8723
8724 guard(mutex)(&trace_types_lock);
8725
8726 iter->tr->trace_ref--;
8727
8728 __trace_array_put(iter->tr);
8729
8730 if (info->spare)
8731 ring_buffer_free_read_page(iter->array_buffer->buffer,
8732 info->spare_cpu, info->spare);
8733 kvfree(info);
8734
8735 return 0;
8736 }
8737
8738 struct buffer_ref {
8739 struct trace_buffer *buffer;
8740 void *page;
8741 int cpu;
8742 refcount_t refcount;
8743 };
8744
buffer_ref_release(struct buffer_ref * ref)8745 static void buffer_ref_release(struct buffer_ref *ref)
8746 {
8747 if (!refcount_dec_and_test(&ref->refcount))
8748 return;
8749 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8750 kfree(ref);
8751 }
8752
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8753 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8754 struct pipe_buffer *buf)
8755 {
8756 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8757
8758 buffer_ref_release(ref);
8759 buf->private = 0;
8760 }
8761
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8762 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8763 struct pipe_buffer *buf)
8764 {
8765 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8766
8767 if (refcount_read(&ref->refcount) > INT_MAX/2)
8768 return false;
8769
8770 refcount_inc(&ref->refcount);
8771 return true;
8772 }
8773
8774 /* Pipe buffer operations for a buffer. */
8775 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8776 .release = buffer_pipe_buf_release,
8777 .get = buffer_pipe_buf_get,
8778 };
8779
8780 /*
8781 * Callback from splice_to_pipe(), if we need to release some pages
8782 * at the end of the spd in case we error'ed out in filling the pipe.
8783 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8784 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8785 {
8786 struct buffer_ref *ref =
8787 (struct buffer_ref *)spd->partial[i].private;
8788
8789 buffer_ref_release(ref);
8790 spd->partial[i].private = 0;
8791 }
8792
8793 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8794 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8795 struct pipe_inode_info *pipe, size_t len,
8796 unsigned int flags)
8797 {
8798 struct ftrace_buffer_info *info = file->private_data;
8799 struct trace_iterator *iter = &info->iter;
8800 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8801 struct page *pages_def[PIPE_DEF_BUFFERS];
8802 struct splice_pipe_desc spd = {
8803 .pages = pages_def,
8804 .partial = partial_def,
8805 .nr_pages_max = PIPE_DEF_BUFFERS,
8806 .ops = &buffer_pipe_buf_ops,
8807 .spd_release = buffer_spd_release,
8808 };
8809 struct buffer_ref *ref;
8810 bool woken = false;
8811 int page_size;
8812 int entries, i;
8813 ssize_t ret = 0;
8814
8815 #ifdef CONFIG_TRACER_MAX_TRACE
8816 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8817 return -EBUSY;
8818 #endif
8819
8820 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8821 if (*ppos & (page_size - 1))
8822 return -EINVAL;
8823
8824 if (len & (page_size - 1)) {
8825 if (len < page_size)
8826 return -EINVAL;
8827 len &= (~(page_size - 1));
8828 }
8829
8830 if (splice_grow_spd(pipe, &spd))
8831 return -ENOMEM;
8832
8833 again:
8834 trace_access_lock(iter->cpu_file);
8835 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8836
8837 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8838 struct page *page;
8839 int r;
8840
8841 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8842 if (!ref) {
8843 ret = -ENOMEM;
8844 break;
8845 }
8846
8847 refcount_set(&ref->refcount, 1);
8848 ref->buffer = iter->array_buffer->buffer;
8849 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8850 if (IS_ERR(ref->page)) {
8851 ret = PTR_ERR(ref->page);
8852 ref->page = NULL;
8853 kfree(ref);
8854 break;
8855 }
8856 ref->cpu = iter->cpu_file;
8857
8858 r = ring_buffer_read_page(ref->buffer, ref->page,
8859 len, iter->cpu_file, 1);
8860 if (r < 0) {
8861 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8862 ref->page);
8863 kfree(ref);
8864 break;
8865 }
8866
8867 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8868
8869 spd.pages[i] = page;
8870 spd.partial[i].len = page_size;
8871 spd.partial[i].offset = 0;
8872 spd.partial[i].private = (unsigned long)ref;
8873 spd.nr_pages++;
8874 *ppos += page_size;
8875
8876 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8877 }
8878
8879 trace_access_unlock(iter->cpu_file);
8880 spd.nr_pages = i;
8881
8882 /* did we read anything? */
8883 if (!spd.nr_pages) {
8884
8885 if (ret)
8886 goto out;
8887
8888 if (woken)
8889 goto out;
8890
8891 ret = -EAGAIN;
8892 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8893 goto out;
8894
8895 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8896 if (ret)
8897 goto out;
8898
8899 /* No need to wait after waking up when tracing is off */
8900 if (!tracer_tracing_is_on(iter->tr))
8901 goto out;
8902
8903 /* Iterate one more time to collect any new data then exit */
8904 woken = true;
8905
8906 goto again;
8907 }
8908
8909 ret = splice_to_pipe(pipe, &spd);
8910 out:
8911 splice_shrink_spd(&spd);
8912
8913 return ret;
8914 }
8915
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8916 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8917 {
8918 struct ftrace_buffer_info *info = file->private_data;
8919 struct trace_iterator *iter = &info->iter;
8920 int err;
8921
8922 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8923 if (!(file->f_flags & O_NONBLOCK)) {
8924 err = ring_buffer_wait(iter->array_buffer->buffer,
8925 iter->cpu_file,
8926 iter->tr->buffer_percent,
8927 NULL, NULL);
8928 if (err)
8929 return err;
8930 }
8931
8932 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8933 iter->cpu_file);
8934 } else if (cmd) {
8935 return -ENOTTY;
8936 }
8937
8938 /*
8939 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8940 * waiters
8941 */
8942 guard(mutex)(&trace_types_lock);
8943
8944 /* Make sure the waiters see the new wait_index */
8945 (void)atomic_fetch_inc_release(&iter->wait_index);
8946
8947 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8948
8949 return 0;
8950 }
8951
8952 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8953 static int get_snapshot_map(struct trace_array *tr)
8954 {
8955 int err = 0;
8956
8957 /*
8958 * Called with mmap_lock held. lockdep would be unhappy if we would now
8959 * take trace_types_lock. Instead use the specific
8960 * snapshot_trigger_lock.
8961 */
8962 spin_lock(&tr->snapshot_trigger_lock);
8963
8964 if (tr->snapshot || tr->mapped == UINT_MAX)
8965 err = -EBUSY;
8966 else
8967 tr->mapped++;
8968
8969 spin_unlock(&tr->snapshot_trigger_lock);
8970
8971 /* Wait for update_max_tr() to observe iter->tr->mapped */
8972 if (tr->mapped == 1)
8973 synchronize_rcu();
8974
8975 return err;
8976
8977 }
put_snapshot_map(struct trace_array * tr)8978 static void put_snapshot_map(struct trace_array *tr)
8979 {
8980 spin_lock(&tr->snapshot_trigger_lock);
8981 if (!WARN_ON(!tr->mapped))
8982 tr->mapped--;
8983 spin_unlock(&tr->snapshot_trigger_lock);
8984 }
8985 #else
get_snapshot_map(struct trace_array * tr)8986 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8987 static inline void put_snapshot_map(struct trace_array *tr) { }
8988 #endif
8989
tracing_buffers_mmap_close(struct vm_area_struct * vma)8990 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8991 {
8992 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8993 struct trace_iterator *iter = &info->iter;
8994
8995 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8996 put_snapshot_map(iter->tr);
8997 }
8998
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)8999 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
9000 {
9001 /*
9002 * Trace buffer mappings require the complete buffer including
9003 * the meta page. Partial mappings are not supported.
9004 */
9005 return -EINVAL;
9006 }
9007
9008 static const struct vm_operations_struct tracing_buffers_vmops = {
9009 .close = tracing_buffers_mmap_close,
9010 .may_split = tracing_buffers_may_split,
9011 };
9012
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)9013 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9014 {
9015 struct ftrace_buffer_info *info = filp->private_data;
9016 struct trace_iterator *iter = &info->iter;
9017 int ret = 0;
9018
9019 /* A memmap'ed and backup buffers are not supported for user space mmap */
9020 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9021 return -ENODEV;
9022
9023 ret = get_snapshot_map(iter->tr);
9024 if (ret)
9025 return ret;
9026
9027 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9028 if (ret)
9029 put_snapshot_map(iter->tr);
9030
9031 vma->vm_ops = &tracing_buffers_vmops;
9032
9033 return ret;
9034 }
9035
9036 static const struct file_operations tracing_buffers_fops = {
9037 .open = tracing_buffers_open,
9038 .read = tracing_buffers_read,
9039 .poll = tracing_buffers_poll,
9040 .release = tracing_buffers_release,
9041 .flush = tracing_buffers_flush,
9042 .splice_read = tracing_buffers_splice_read,
9043 .unlocked_ioctl = tracing_buffers_ioctl,
9044 .mmap = tracing_buffers_mmap,
9045 };
9046
9047 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)9048 tracing_stats_read(struct file *filp, char __user *ubuf,
9049 size_t count, loff_t *ppos)
9050 {
9051 struct inode *inode = file_inode(filp);
9052 struct trace_array *tr = inode->i_private;
9053 struct array_buffer *trace_buf = &tr->array_buffer;
9054 int cpu = tracing_get_cpu(inode);
9055 struct trace_seq *s;
9056 unsigned long cnt;
9057 unsigned long long t;
9058 unsigned long usec_rem;
9059
9060 s = kmalloc(sizeof(*s), GFP_KERNEL);
9061 if (!s)
9062 return -ENOMEM;
9063
9064 trace_seq_init(s);
9065
9066 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9067 trace_seq_printf(s, "entries: %ld\n", cnt);
9068
9069 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9070 trace_seq_printf(s, "overrun: %ld\n", cnt);
9071
9072 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9073 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9074
9075 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9076 trace_seq_printf(s, "bytes: %ld\n", cnt);
9077
9078 if (trace_clocks[tr->clock_id].in_ns) {
9079 /* local or global for trace_clock */
9080 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9081 usec_rem = do_div(t, USEC_PER_SEC);
9082 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9083 t, usec_rem);
9084
9085 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9086 usec_rem = do_div(t, USEC_PER_SEC);
9087 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9088 } else {
9089 /* counter or tsc mode for trace_clock */
9090 trace_seq_printf(s, "oldest event ts: %llu\n",
9091 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9092
9093 trace_seq_printf(s, "now ts: %llu\n",
9094 ring_buffer_time_stamp(trace_buf->buffer));
9095 }
9096
9097 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9098 trace_seq_printf(s, "dropped events: %ld\n", cnt);
9099
9100 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9101 trace_seq_printf(s, "read events: %ld\n", cnt);
9102
9103 count = simple_read_from_buffer(ubuf, count, ppos,
9104 s->buffer, trace_seq_used(s));
9105
9106 kfree(s);
9107
9108 return count;
9109 }
9110
9111 static const struct file_operations tracing_stats_fops = {
9112 .open = tracing_open_generic_tr,
9113 .read = tracing_stats_read,
9114 .llseek = generic_file_llseek,
9115 .release = tracing_release_generic_tr,
9116 };
9117
9118 #ifdef CONFIG_DYNAMIC_FTRACE
9119
9120 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9121 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9122 size_t cnt, loff_t *ppos)
9123 {
9124 ssize_t ret;
9125 char *buf;
9126 int r;
9127
9128 /* 512 should be plenty to hold the amount needed */
9129 #define DYN_INFO_BUF_SIZE 512
9130
9131 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9132 if (!buf)
9133 return -ENOMEM;
9134
9135 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9136 "%ld pages:%ld groups: %ld\n"
9137 "ftrace boot update time = %llu (ns)\n"
9138 "ftrace module total update time = %llu (ns)\n",
9139 ftrace_update_tot_cnt,
9140 ftrace_number_of_pages,
9141 ftrace_number_of_groups,
9142 ftrace_update_time,
9143 ftrace_total_mod_time);
9144
9145 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9146 kfree(buf);
9147 return ret;
9148 }
9149
9150 static const struct file_operations tracing_dyn_info_fops = {
9151 .open = tracing_open_generic,
9152 .read = tracing_read_dyn_info,
9153 .llseek = generic_file_llseek,
9154 };
9155 #endif /* CONFIG_DYNAMIC_FTRACE */
9156
9157 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9158 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9159 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9160 struct trace_array *tr, struct ftrace_probe_ops *ops,
9161 void *data)
9162 {
9163 tracing_snapshot_instance(tr);
9164 }
9165
9166 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9167 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9168 struct trace_array *tr, struct ftrace_probe_ops *ops,
9169 void *data)
9170 {
9171 struct ftrace_func_mapper *mapper = data;
9172 long *count = NULL;
9173
9174 if (mapper)
9175 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9176
9177 if (count) {
9178
9179 if (*count <= 0)
9180 return;
9181
9182 (*count)--;
9183 }
9184
9185 tracing_snapshot_instance(tr);
9186 }
9187
9188 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)9189 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9190 struct ftrace_probe_ops *ops, void *data)
9191 {
9192 struct ftrace_func_mapper *mapper = data;
9193 long *count = NULL;
9194
9195 seq_printf(m, "%ps:", (void *)ip);
9196
9197 seq_puts(m, "snapshot");
9198
9199 if (mapper)
9200 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9201
9202 if (count)
9203 seq_printf(m, ":count=%ld\n", *count);
9204 else
9205 seq_puts(m, ":unlimited\n");
9206
9207 return 0;
9208 }
9209
9210 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)9211 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9212 unsigned long ip, void *init_data, void **data)
9213 {
9214 struct ftrace_func_mapper *mapper = *data;
9215
9216 if (!mapper) {
9217 mapper = allocate_ftrace_func_mapper();
9218 if (!mapper)
9219 return -ENOMEM;
9220 *data = mapper;
9221 }
9222
9223 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9224 }
9225
9226 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9227 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9228 unsigned long ip, void *data)
9229 {
9230 struct ftrace_func_mapper *mapper = data;
9231
9232 if (!ip) {
9233 if (!mapper)
9234 return;
9235 free_ftrace_func_mapper(mapper, NULL);
9236 return;
9237 }
9238
9239 ftrace_func_mapper_remove_ip(mapper, ip);
9240 }
9241
9242 static struct ftrace_probe_ops snapshot_probe_ops = {
9243 .func = ftrace_snapshot,
9244 .print = ftrace_snapshot_print,
9245 };
9246
9247 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9248 .func = ftrace_count_snapshot,
9249 .print = ftrace_snapshot_print,
9250 .init = ftrace_snapshot_init,
9251 .free = ftrace_snapshot_free,
9252 };
9253
9254 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9255 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9256 char *glob, char *cmd, char *param, int enable)
9257 {
9258 struct ftrace_probe_ops *ops;
9259 void *count = (void *)-1;
9260 char *number;
9261 int ret;
9262
9263 if (!tr)
9264 return -ENODEV;
9265
9266 /* hash funcs only work with set_ftrace_filter */
9267 if (!enable)
9268 return -EINVAL;
9269
9270 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
9271
9272 if (glob[0] == '!') {
9273 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9274 if (!ret)
9275 tracing_disarm_snapshot(tr);
9276
9277 return ret;
9278 }
9279
9280 if (!param)
9281 goto out_reg;
9282
9283 number = strsep(¶m, ":");
9284
9285 if (!strlen(number))
9286 goto out_reg;
9287
9288 /*
9289 * We use the callback data field (which is a pointer)
9290 * as our counter.
9291 */
9292 ret = kstrtoul(number, 0, (unsigned long *)&count);
9293 if (ret)
9294 return ret;
9295
9296 out_reg:
9297 ret = tracing_arm_snapshot(tr);
9298 if (ret < 0)
9299 return ret;
9300
9301 ret = register_ftrace_function_probe(glob, tr, ops, count);
9302 if (ret < 0)
9303 tracing_disarm_snapshot(tr);
9304
9305 return ret < 0 ? ret : 0;
9306 }
9307
9308 static struct ftrace_func_command ftrace_snapshot_cmd = {
9309 .name = "snapshot",
9310 .func = ftrace_trace_snapshot_callback,
9311 };
9312
register_snapshot_cmd(void)9313 static __init int register_snapshot_cmd(void)
9314 {
9315 return register_ftrace_command(&ftrace_snapshot_cmd);
9316 }
9317 #else
register_snapshot_cmd(void)9318 static inline __init int register_snapshot_cmd(void) { return 0; }
9319 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9320
tracing_get_dentry(struct trace_array * tr)9321 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9322 {
9323 /* Top directory uses NULL as the parent */
9324 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9325 return NULL;
9326
9327 if (WARN_ON(!tr->dir))
9328 return ERR_PTR(-ENODEV);
9329
9330 /* All sub buffers have a descriptor */
9331 return tr->dir;
9332 }
9333
tracing_dentry_percpu(struct trace_array * tr,int cpu)9334 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9335 {
9336 struct dentry *d_tracer;
9337
9338 if (tr->percpu_dir)
9339 return tr->percpu_dir;
9340
9341 d_tracer = tracing_get_dentry(tr);
9342 if (IS_ERR(d_tracer))
9343 return NULL;
9344
9345 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9346
9347 MEM_FAIL(!tr->percpu_dir,
9348 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9349
9350 return tr->percpu_dir;
9351 }
9352
9353 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9354 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9355 void *data, long cpu, const struct file_operations *fops)
9356 {
9357 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9358
9359 if (ret) /* See tracing_get_cpu() */
9360 d_inode(ret)->i_cdev = (void *)(cpu + 1);
9361 return ret;
9362 }
9363
9364 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9365 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9366 {
9367 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9368 struct dentry *d_cpu;
9369 char cpu_dir[30]; /* 30 characters should be more than enough */
9370
9371 if (!d_percpu)
9372 return;
9373
9374 snprintf(cpu_dir, 30, "cpu%ld", cpu);
9375 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9376 if (!d_cpu) {
9377 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9378 return;
9379 }
9380
9381 /* per cpu trace_pipe */
9382 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9383 tr, cpu, &tracing_pipe_fops);
9384
9385 /* per cpu trace */
9386 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9387 tr, cpu, &tracing_fops);
9388
9389 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9390 tr, cpu, &tracing_buffers_fops);
9391
9392 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9393 tr, cpu, &tracing_stats_fops);
9394
9395 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9396 tr, cpu, &tracing_entries_fops);
9397
9398 if (tr->range_addr_start)
9399 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9400 tr, cpu, &tracing_buffer_meta_fops);
9401 #ifdef CONFIG_TRACER_SNAPSHOT
9402 if (!tr->range_addr_start) {
9403 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9404 tr, cpu, &snapshot_fops);
9405
9406 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9407 tr, cpu, &snapshot_raw_fops);
9408 }
9409 #endif
9410 }
9411
9412 #ifdef CONFIG_FTRACE_SELFTEST
9413 /* Let selftest have access to static functions in this file */
9414 #include "trace_selftest.c"
9415 #endif
9416
9417 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9418 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9419 loff_t *ppos)
9420 {
9421 struct trace_option_dentry *topt = filp->private_data;
9422 char *buf;
9423
9424 if (topt->flags->val & topt->opt->bit)
9425 buf = "1\n";
9426 else
9427 buf = "0\n";
9428
9429 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9430 }
9431
9432 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9433 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9434 loff_t *ppos)
9435 {
9436 struct trace_option_dentry *topt = filp->private_data;
9437 unsigned long val;
9438 int ret;
9439
9440 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9441 if (ret)
9442 return ret;
9443
9444 if (val != 0 && val != 1)
9445 return -EINVAL;
9446
9447 if (!!(topt->flags->val & topt->opt->bit) != val) {
9448 guard(mutex)(&trace_types_lock);
9449 ret = __set_tracer_option(topt->tr, topt->flags,
9450 topt->opt, !val);
9451 if (ret)
9452 return ret;
9453 }
9454
9455 *ppos += cnt;
9456
9457 return cnt;
9458 }
9459
tracing_open_options(struct inode * inode,struct file * filp)9460 static int tracing_open_options(struct inode *inode, struct file *filp)
9461 {
9462 struct trace_option_dentry *topt = inode->i_private;
9463 int ret;
9464
9465 ret = tracing_check_open_get_tr(topt->tr);
9466 if (ret)
9467 return ret;
9468
9469 filp->private_data = inode->i_private;
9470 return 0;
9471 }
9472
tracing_release_options(struct inode * inode,struct file * file)9473 static int tracing_release_options(struct inode *inode, struct file *file)
9474 {
9475 struct trace_option_dentry *topt = file->private_data;
9476
9477 trace_array_put(topt->tr);
9478 return 0;
9479 }
9480
9481 static const struct file_operations trace_options_fops = {
9482 .open = tracing_open_options,
9483 .read = trace_options_read,
9484 .write = trace_options_write,
9485 .llseek = generic_file_llseek,
9486 .release = tracing_release_options,
9487 };
9488
9489 /*
9490 * In order to pass in both the trace_array descriptor as well as the index
9491 * to the flag that the trace option file represents, the trace_array
9492 * has a character array of trace_flags_index[], which holds the index
9493 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9494 * The address of this character array is passed to the flag option file
9495 * read/write callbacks.
9496 *
9497 * In order to extract both the index and the trace_array descriptor,
9498 * get_tr_index() uses the following algorithm.
9499 *
9500 * idx = *ptr;
9501 *
9502 * As the pointer itself contains the address of the index (remember
9503 * index[1] == 1).
9504 *
9505 * Then to get the trace_array descriptor, by subtracting that index
9506 * from the ptr, we get to the start of the index itself.
9507 *
9508 * ptr - idx == &index[0]
9509 *
9510 * Then a simple container_of() from that pointer gets us to the
9511 * trace_array descriptor.
9512 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9513 static void get_tr_index(void *data, struct trace_array **ptr,
9514 unsigned int *pindex)
9515 {
9516 *pindex = *(unsigned char *)data;
9517
9518 *ptr = container_of(data - *pindex, struct trace_array,
9519 trace_flags_index);
9520 }
9521
9522 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9523 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9524 loff_t *ppos)
9525 {
9526 void *tr_index = filp->private_data;
9527 struct trace_array *tr;
9528 unsigned int index;
9529 char *buf;
9530
9531 get_tr_index(tr_index, &tr, &index);
9532
9533 if (tr->trace_flags & (1ULL << index))
9534 buf = "1\n";
9535 else
9536 buf = "0\n";
9537
9538 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9539 }
9540
9541 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9542 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9543 loff_t *ppos)
9544 {
9545 void *tr_index = filp->private_data;
9546 struct trace_array *tr;
9547 unsigned int index;
9548 unsigned long val;
9549 int ret;
9550
9551 get_tr_index(tr_index, &tr, &index);
9552
9553 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9554 if (ret)
9555 return ret;
9556
9557 if (val != 0 && val != 1)
9558 return -EINVAL;
9559
9560 mutex_lock(&event_mutex);
9561 mutex_lock(&trace_types_lock);
9562 ret = set_tracer_flag(tr, 1ULL << index, val);
9563 mutex_unlock(&trace_types_lock);
9564 mutex_unlock(&event_mutex);
9565
9566 if (ret < 0)
9567 return ret;
9568
9569 *ppos += cnt;
9570
9571 return cnt;
9572 }
9573
9574 static const struct file_operations trace_options_core_fops = {
9575 .open = tracing_open_generic,
9576 .read = trace_options_core_read,
9577 .write = trace_options_core_write,
9578 .llseek = generic_file_llseek,
9579 };
9580
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9581 struct dentry *trace_create_file(const char *name,
9582 umode_t mode,
9583 struct dentry *parent,
9584 void *data,
9585 const struct file_operations *fops)
9586 {
9587 struct dentry *ret;
9588
9589 ret = tracefs_create_file(name, mode, parent, data, fops);
9590 if (!ret)
9591 pr_warn("Could not create tracefs '%s' entry\n", name);
9592
9593 return ret;
9594 }
9595
9596
trace_options_init_dentry(struct trace_array * tr)9597 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9598 {
9599 struct dentry *d_tracer;
9600
9601 if (tr->options)
9602 return tr->options;
9603
9604 d_tracer = tracing_get_dentry(tr);
9605 if (IS_ERR(d_tracer))
9606 return NULL;
9607
9608 tr->options = tracefs_create_dir("options", d_tracer);
9609 if (!tr->options) {
9610 pr_warn("Could not create tracefs directory 'options'\n");
9611 return NULL;
9612 }
9613
9614 return tr->options;
9615 }
9616
9617 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9618 create_trace_option_file(struct trace_array *tr,
9619 struct trace_option_dentry *topt,
9620 struct tracer_flags *flags,
9621 struct tracer_opt *opt)
9622 {
9623 struct dentry *t_options;
9624
9625 t_options = trace_options_init_dentry(tr);
9626 if (!t_options)
9627 return;
9628
9629 topt->flags = flags;
9630 topt->opt = opt;
9631 topt->tr = tr;
9632
9633 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9634 t_options, topt, &trace_options_fops);
9635 }
9636
9637 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)9638 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9639 struct tracer_flags *flags)
9640 {
9641 struct trace_option_dentry *topts;
9642 struct trace_options *tr_topts;
9643 struct tracer_opt *opts;
9644 int cnt;
9645
9646 if (!flags || !flags->opts)
9647 return 0;
9648
9649 opts = flags->opts;
9650
9651 for (cnt = 0; opts[cnt].name; cnt++)
9652 ;
9653
9654 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9655 if (!topts)
9656 return 0;
9657
9658 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9659 GFP_KERNEL);
9660 if (!tr_topts) {
9661 kfree(topts);
9662 return -ENOMEM;
9663 }
9664
9665 tr->topts = tr_topts;
9666 tr->topts[tr->nr_topts].tracer = tracer;
9667 tr->topts[tr->nr_topts].topts = topts;
9668 tr->nr_topts++;
9669
9670 for (cnt = 0; opts[cnt].name; cnt++) {
9671 create_trace_option_file(tr, &topts[cnt], flags,
9672 &opts[cnt]);
9673 MEM_FAIL(topts[cnt].entry == NULL,
9674 "Failed to create trace option: %s",
9675 opts[cnt].name);
9676 }
9677 return 0;
9678 }
9679
get_global_flags_val(struct tracer * tracer)9680 static int get_global_flags_val(struct tracer *tracer)
9681 {
9682 struct tracers *t;
9683
9684 list_for_each_entry(t, &global_trace.tracers, list) {
9685 if (t->tracer != tracer)
9686 continue;
9687 if (!t->flags)
9688 return -1;
9689 return t->flags->val;
9690 }
9691 return -1;
9692 }
9693
add_tracer_options(struct trace_array * tr,struct tracers * t)9694 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9695 {
9696 struct tracer *tracer = t->tracer;
9697 struct tracer_flags *flags = t->flags ?: tracer->flags;
9698
9699 if (!flags)
9700 return 0;
9701
9702 /* Only add tracer options after update_tracer_options finish */
9703 if (!tracer_options_updated)
9704 return 0;
9705
9706 return create_trace_option_files(tr, tracer, flags);
9707 }
9708
add_tracer(struct trace_array * tr,struct tracer * tracer)9709 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9710 {
9711 struct tracer_flags *flags;
9712 struct tracers *t;
9713 int ret;
9714
9715 /* Only enable if the directory has been created already. */
9716 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9717 return 0;
9718
9719 /*
9720 * If this is an instance, only create flags for tracers
9721 * the instance may have.
9722 */
9723 if (!trace_ok_for_array(tracer, tr))
9724 return 0;
9725
9726 t = kmalloc(sizeof(*t), GFP_KERNEL);
9727 if (!t)
9728 return -ENOMEM;
9729
9730 t->tracer = tracer;
9731 t->flags = NULL;
9732 list_add(&t->list, &tr->tracers);
9733
9734 flags = tracer->flags;
9735 if (!flags) {
9736 if (!tracer->default_flags)
9737 return 0;
9738
9739 /*
9740 * If the tracer defines default flags, it means the flags are
9741 * per trace instance.
9742 */
9743 flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9744 if (!flags)
9745 return -ENOMEM;
9746
9747 *flags = *tracer->default_flags;
9748 flags->trace = tracer;
9749
9750 t->flags = flags;
9751
9752 /* If this is an instance, inherit the global_trace flags */
9753 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9754 int val = get_global_flags_val(tracer);
9755 if (!WARN_ON_ONCE(val < 0))
9756 flags->val = val;
9757 }
9758 }
9759
9760 ret = add_tracer_options(tr, t);
9761 if (ret < 0) {
9762 list_del(&t->list);
9763 kfree(t->flags);
9764 kfree(t);
9765 }
9766
9767 return ret;
9768 }
9769
9770 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9771 create_trace_option_core_file(struct trace_array *tr,
9772 const char *option, long index)
9773 {
9774 struct dentry *t_options;
9775
9776 t_options = trace_options_init_dentry(tr);
9777 if (!t_options)
9778 return NULL;
9779
9780 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9781 (void *)&tr->trace_flags_index[index],
9782 &trace_options_core_fops);
9783 }
9784
create_trace_options_dir(struct trace_array * tr)9785 static void create_trace_options_dir(struct trace_array *tr)
9786 {
9787 struct dentry *t_options;
9788 bool top_level = tr == &global_trace;
9789 int i;
9790
9791 t_options = trace_options_init_dentry(tr);
9792 if (!t_options)
9793 return;
9794
9795 for (i = 0; trace_options[i]; i++) {
9796 if (top_level ||
9797 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9798 create_trace_option_core_file(tr, trace_options[i], i);
9799 }
9800 }
9801 }
9802
9803 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9804 rb_simple_read(struct file *filp, char __user *ubuf,
9805 size_t cnt, loff_t *ppos)
9806 {
9807 struct trace_array *tr = filp->private_data;
9808 char buf[64];
9809 int r;
9810
9811 r = tracer_tracing_is_on(tr);
9812 r = sprintf(buf, "%d\n", r);
9813
9814 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9815 }
9816
9817 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9818 rb_simple_write(struct file *filp, const char __user *ubuf,
9819 size_t cnt, loff_t *ppos)
9820 {
9821 struct trace_array *tr = filp->private_data;
9822 struct trace_buffer *buffer = tr->array_buffer.buffer;
9823 unsigned long val;
9824 int ret;
9825
9826 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9827 if (ret)
9828 return ret;
9829
9830 if (buffer) {
9831 guard(mutex)(&trace_types_lock);
9832 if (!!val == tracer_tracing_is_on(tr)) {
9833 val = 0; /* do nothing */
9834 } else if (val) {
9835 tracer_tracing_on(tr);
9836 if (tr->current_trace->start)
9837 tr->current_trace->start(tr);
9838 } else {
9839 tracer_tracing_off(tr);
9840 if (tr->current_trace->stop)
9841 tr->current_trace->stop(tr);
9842 /* Wake up any waiters */
9843 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9844 }
9845 }
9846
9847 (*ppos)++;
9848
9849 return cnt;
9850 }
9851
9852 static const struct file_operations rb_simple_fops = {
9853 .open = tracing_open_generic_tr,
9854 .read = rb_simple_read,
9855 .write = rb_simple_write,
9856 .release = tracing_release_generic_tr,
9857 .llseek = default_llseek,
9858 };
9859
9860 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9861 buffer_percent_read(struct file *filp, char __user *ubuf,
9862 size_t cnt, loff_t *ppos)
9863 {
9864 struct trace_array *tr = filp->private_data;
9865 char buf[64];
9866 int r;
9867
9868 r = tr->buffer_percent;
9869 r = sprintf(buf, "%d\n", r);
9870
9871 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9872 }
9873
9874 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9875 buffer_percent_write(struct file *filp, const char __user *ubuf,
9876 size_t cnt, loff_t *ppos)
9877 {
9878 struct trace_array *tr = filp->private_data;
9879 unsigned long val;
9880 int ret;
9881
9882 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9883 if (ret)
9884 return ret;
9885
9886 if (val > 100)
9887 return -EINVAL;
9888
9889 tr->buffer_percent = val;
9890
9891 (*ppos)++;
9892
9893 return cnt;
9894 }
9895
9896 static const struct file_operations buffer_percent_fops = {
9897 .open = tracing_open_generic_tr,
9898 .read = buffer_percent_read,
9899 .write = buffer_percent_write,
9900 .release = tracing_release_generic_tr,
9901 .llseek = default_llseek,
9902 };
9903
9904 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9905 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9906 {
9907 struct trace_array *tr = filp->private_data;
9908 size_t size;
9909 char buf[64];
9910 int order;
9911 int r;
9912
9913 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9914 size = (PAGE_SIZE << order) / 1024;
9915
9916 r = sprintf(buf, "%zd\n", size);
9917
9918 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9919 }
9920
9921 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9922 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9923 size_t cnt, loff_t *ppos)
9924 {
9925 struct trace_array *tr = filp->private_data;
9926 unsigned long val;
9927 int old_order;
9928 int order;
9929 int pages;
9930 int ret;
9931
9932 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9933 if (ret)
9934 return ret;
9935
9936 val *= 1024; /* value passed in is in KB */
9937
9938 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9939 order = fls(pages - 1);
9940
9941 /* limit between 1 and 128 system pages */
9942 if (order < 0 || order > 7)
9943 return -EINVAL;
9944
9945 /* Do not allow tracing while changing the order of the ring buffer */
9946 tracing_stop_tr(tr);
9947
9948 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9949 if (old_order == order)
9950 goto out;
9951
9952 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9953 if (ret)
9954 goto out;
9955
9956 #ifdef CONFIG_TRACER_MAX_TRACE
9957
9958 if (!tr->allocated_snapshot)
9959 goto out_max;
9960
9961 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9962 if (ret) {
9963 /* Put back the old order */
9964 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9965 if (WARN_ON_ONCE(cnt)) {
9966 /*
9967 * AARGH! We are left with different orders!
9968 * The max buffer is our "snapshot" buffer.
9969 * When a tracer needs a snapshot (one of the
9970 * latency tracers), it swaps the max buffer
9971 * with the saved snap shot. We succeeded to
9972 * update the order of the main buffer, but failed to
9973 * update the order of the max buffer. But when we tried
9974 * to reset the main buffer to the original size, we
9975 * failed there too. This is very unlikely to
9976 * happen, but if it does, warn and kill all
9977 * tracing.
9978 */
9979 tracing_disabled = 1;
9980 }
9981 goto out;
9982 }
9983 out_max:
9984 #endif
9985 (*ppos)++;
9986 out:
9987 if (ret)
9988 cnt = ret;
9989 tracing_start_tr(tr);
9990 return cnt;
9991 }
9992
9993 static const struct file_operations buffer_subbuf_size_fops = {
9994 .open = tracing_open_generic_tr,
9995 .read = buffer_subbuf_size_read,
9996 .write = buffer_subbuf_size_write,
9997 .release = tracing_release_generic_tr,
9998 .llseek = default_llseek,
9999 };
10000
10001 static struct dentry *trace_instance_dir;
10002
10003 static void
10004 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10005
10006 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)10007 static int make_mod_delta(struct module *mod, void *data)
10008 {
10009 struct trace_module_delta *module_delta;
10010 struct trace_scratch *tscratch;
10011 struct trace_mod_entry *entry;
10012 struct trace_array *tr = data;
10013 int i;
10014
10015 tscratch = tr->scratch;
10016 module_delta = READ_ONCE(tr->module_delta);
10017 for (i = 0; i < tscratch->nr_entries; i++) {
10018 entry = &tscratch->entries[i];
10019 if (strcmp(mod->name, entry->mod_name))
10020 continue;
10021 if (mod->state == MODULE_STATE_GOING)
10022 module_delta->delta[i] = 0;
10023 else
10024 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10025 - entry->mod_addr;
10026 break;
10027 }
10028 return 0;
10029 }
10030 #else
make_mod_delta(struct module * mod,void * data)10031 static int make_mod_delta(struct module *mod, void *data)
10032 {
10033 return 0;
10034 }
10035 #endif
10036
mod_addr_comp(const void * a,const void * b,const void * data)10037 static int mod_addr_comp(const void *a, const void *b, const void *data)
10038 {
10039 const struct trace_mod_entry *e1 = a;
10040 const struct trace_mod_entry *e2 = b;
10041
10042 return e1->mod_addr > e2->mod_addr ? 1 : -1;
10043 }
10044
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)10045 static void setup_trace_scratch(struct trace_array *tr,
10046 struct trace_scratch *tscratch, unsigned int size)
10047 {
10048 struct trace_module_delta *module_delta;
10049 struct trace_mod_entry *entry;
10050 int i, nr_entries;
10051
10052 if (!tscratch)
10053 return;
10054
10055 tr->scratch = tscratch;
10056 tr->scratch_size = size;
10057
10058 if (tscratch->text_addr)
10059 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10060
10061 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10062 goto reset;
10063
10064 /* Check if each module name is a valid string */
10065 for (i = 0; i < tscratch->nr_entries; i++) {
10066 int n;
10067
10068 entry = &tscratch->entries[i];
10069
10070 for (n = 0; n < MODULE_NAME_LEN; n++) {
10071 if (entry->mod_name[n] == '\0')
10072 break;
10073 if (!isprint(entry->mod_name[n]))
10074 goto reset;
10075 }
10076 if (n == MODULE_NAME_LEN)
10077 goto reset;
10078 }
10079
10080 /* Sort the entries so that we can find appropriate module from address. */
10081 nr_entries = tscratch->nr_entries;
10082 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10083 mod_addr_comp, NULL, NULL);
10084
10085 if (IS_ENABLED(CONFIG_MODULES)) {
10086 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10087 if (!module_delta) {
10088 pr_info("module_delta allocation failed. Not able to decode module address.");
10089 goto reset;
10090 }
10091 init_rcu_head(&module_delta->rcu);
10092 } else
10093 module_delta = NULL;
10094 WRITE_ONCE(tr->module_delta, module_delta);
10095
10096 /* Scan modules to make text delta for modules. */
10097 module_for_each_mod(make_mod_delta, tr);
10098
10099 /* Set trace_clock as the same of the previous boot. */
10100 if (tscratch->clock_id != tr->clock_id) {
10101 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10102 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10103 pr_info("the previous trace_clock info is not valid.");
10104 goto reset;
10105 }
10106 }
10107 return;
10108 reset:
10109 /* Invalid trace modules */
10110 memset(tscratch, 0, size);
10111 }
10112
10113 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)10114 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10115 {
10116 enum ring_buffer_flags rb_flags;
10117 struct trace_scratch *tscratch;
10118 unsigned int scratch_size = 0;
10119
10120 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10121
10122 buf->tr = tr;
10123
10124 if (tr->range_addr_start && tr->range_addr_size) {
10125 /* Add scratch buffer to handle 128 modules */
10126 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10127 tr->range_addr_start,
10128 tr->range_addr_size,
10129 struct_size(tscratch, entries, 128));
10130
10131 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10132 setup_trace_scratch(tr, tscratch, scratch_size);
10133
10134 /*
10135 * This is basically the same as a mapped buffer,
10136 * with the same restrictions.
10137 */
10138 tr->mapped++;
10139 } else {
10140 buf->buffer = ring_buffer_alloc(size, rb_flags);
10141 }
10142 if (!buf->buffer)
10143 return -ENOMEM;
10144
10145 buf->data = alloc_percpu(struct trace_array_cpu);
10146 if (!buf->data) {
10147 ring_buffer_free(buf->buffer);
10148 buf->buffer = NULL;
10149 return -ENOMEM;
10150 }
10151
10152 /* Allocate the first page for all buffers */
10153 set_buffer_entries(&tr->array_buffer,
10154 ring_buffer_size(tr->array_buffer.buffer, 0));
10155
10156 return 0;
10157 }
10158
free_trace_buffer(struct array_buffer * buf)10159 static void free_trace_buffer(struct array_buffer *buf)
10160 {
10161 if (buf->buffer) {
10162 ring_buffer_free(buf->buffer);
10163 buf->buffer = NULL;
10164 free_percpu(buf->data);
10165 buf->data = NULL;
10166 }
10167 }
10168
allocate_trace_buffers(struct trace_array * tr,int size)10169 static int allocate_trace_buffers(struct trace_array *tr, int size)
10170 {
10171 int ret;
10172
10173 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10174 if (ret)
10175 return ret;
10176
10177 #ifdef CONFIG_TRACER_MAX_TRACE
10178 /* Fix mapped buffer trace arrays do not have snapshot buffers */
10179 if (tr->range_addr_start)
10180 return 0;
10181
10182 ret = allocate_trace_buffer(tr, &tr->max_buffer,
10183 allocate_snapshot ? size : 1);
10184 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10185 free_trace_buffer(&tr->array_buffer);
10186 return -ENOMEM;
10187 }
10188 tr->allocated_snapshot = allocate_snapshot;
10189
10190 allocate_snapshot = false;
10191 #endif
10192
10193 return 0;
10194 }
10195
free_trace_buffers(struct trace_array * tr)10196 static void free_trace_buffers(struct trace_array *tr)
10197 {
10198 if (!tr)
10199 return;
10200
10201 free_trace_buffer(&tr->array_buffer);
10202 kfree(tr->module_delta);
10203
10204 #ifdef CONFIG_TRACER_MAX_TRACE
10205 free_trace_buffer(&tr->max_buffer);
10206 #endif
10207 }
10208
init_trace_flags_index(struct trace_array * tr)10209 static void init_trace_flags_index(struct trace_array *tr)
10210 {
10211 int i;
10212
10213 /* Used by the trace options files */
10214 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10215 tr->trace_flags_index[i] = i;
10216 }
10217
__update_tracer(struct trace_array * tr)10218 static int __update_tracer(struct trace_array *tr)
10219 {
10220 struct tracer *t;
10221 int ret = 0;
10222
10223 for (t = trace_types; t && !ret; t = t->next)
10224 ret = add_tracer(tr, t);
10225
10226 return ret;
10227 }
10228
__update_tracer_options(struct trace_array * tr)10229 static __init int __update_tracer_options(struct trace_array *tr)
10230 {
10231 struct tracers *t;
10232 int ret = 0;
10233
10234 list_for_each_entry(t, &tr->tracers, list) {
10235 ret = add_tracer_options(tr, t);
10236 if (ret < 0)
10237 break;
10238 }
10239
10240 return ret;
10241 }
10242
update_tracer_options(void)10243 static __init void update_tracer_options(void)
10244 {
10245 struct trace_array *tr;
10246
10247 guard(mutex)(&trace_types_lock);
10248 tracer_options_updated = true;
10249 list_for_each_entry(tr, &ftrace_trace_arrays, list)
10250 __update_tracer_options(tr);
10251 }
10252
10253 /* Must have trace_types_lock held */
trace_array_find(const char * instance)10254 struct trace_array *trace_array_find(const char *instance)
10255 {
10256 struct trace_array *tr, *found = NULL;
10257
10258 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10259 if (tr->name && strcmp(tr->name, instance) == 0) {
10260 found = tr;
10261 break;
10262 }
10263 }
10264
10265 return found;
10266 }
10267
trace_array_find_get(const char * instance)10268 struct trace_array *trace_array_find_get(const char *instance)
10269 {
10270 struct trace_array *tr;
10271
10272 guard(mutex)(&trace_types_lock);
10273 tr = trace_array_find(instance);
10274 if (tr)
10275 tr->ref++;
10276
10277 return tr;
10278 }
10279
trace_array_create_dir(struct trace_array * tr)10280 static int trace_array_create_dir(struct trace_array *tr)
10281 {
10282 int ret;
10283
10284 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10285 if (!tr->dir)
10286 return -EINVAL;
10287
10288 ret = event_trace_add_tracer(tr->dir, tr);
10289 if (ret) {
10290 tracefs_remove(tr->dir);
10291 return ret;
10292 }
10293
10294 init_tracer_tracefs(tr, tr->dir);
10295 ret = __update_tracer(tr);
10296 if (ret) {
10297 event_trace_del_tracer(tr);
10298 tracefs_remove(tr->dir);
10299 return ret;
10300 }
10301 return 0;
10302 }
10303
10304 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)10305 trace_array_create_systems(const char *name, const char *systems,
10306 unsigned long range_addr_start,
10307 unsigned long range_addr_size)
10308 {
10309 struct trace_array *tr;
10310 int ret;
10311
10312 ret = -ENOMEM;
10313 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10314 if (!tr)
10315 return ERR_PTR(ret);
10316
10317 tr->name = kstrdup(name, GFP_KERNEL);
10318 if (!tr->name)
10319 goto out_free_tr;
10320
10321 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10322 goto out_free_tr;
10323
10324 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10325 goto out_free_tr;
10326
10327 if (systems) {
10328 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10329 if (!tr->system_names)
10330 goto out_free_tr;
10331 }
10332
10333 /* Only for boot up memory mapped ring buffers */
10334 tr->range_addr_start = range_addr_start;
10335 tr->range_addr_size = range_addr_size;
10336
10337 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10338
10339 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10340
10341 raw_spin_lock_init(&tr->start_lock);
10342
10343 tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10344
10345 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10346 #ifdef CONFIG_TRACER_MAX_TRACE
10347 spin_lock_init(&tr->snapshot_trigger_lock);
10348 #endif
10349 tr->current_trace = &nop_trace;
10350 tr->current_trace_flags = nop_trace.flags;
10351
10352 INIT_LIST_HEAD(&tr->systems);
10353 INIT_LIST_HEAD(&tr->events);
10354 INIT_LIST_HEAD(&tr->hist_vars);
10355 INIT_LIST_HEAD(&tr->err_log);
10356 INIT_LIST_HEAD(&tr->tracers);
10357 INIT_LIST_HEAD(&tr->marker_list);
10358
10359 #ifdef CONFIG_MODULES
10360 INIT_LIST_HEAD(&tr->mod_events);
10361 #endif
10362
10363 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10364 goto out_free_tr;
10365
10366 /* The ring buffer is defaultly expanded */
10367 trace_set_ring_buffer_expanded(tr);
10368
10369 if (ftrace_allocate_ftrace_ops(tr) < 0)
10370 goto out_free_tr;
10371
10372 ftrace_init_trace_array(tr);
10373
10374 init_trace_flags_index(tr);
10375
10376 if (trace_instance_dir) {
10377 ret = trace_array_create_dir(tr);
10378 if (ret)
10379 goto out_free_tr;
10380 } else
10381 __trace_early_add_events(tr);
10382
10383 list_add(&tr->list, &ftrace_trace_arrays);
10384
10385 tr->ref++;
10386
10387 return tr;
10388
10389 out_free_tr:
10390 ftrace_free_ftrace_ops(tr);
10391 free_trace_buffers(tr);
10392 free_cpumask_var(tr->pipe_cpumask);
10393 free_cpumask_var(tr->tracing_cpumask);
10394 kfree_const(tr->system_names);
10395 kfree(tr->range_name);
10396 kfree(tr->name);
10397 kfree(tr);
10398
10399 return ERR_PTR(ret);
10400 }
10401
trace_array_create(const char * name)10402 static struct trace_array *trace_array_create(const char *name)
10403 {
10404 return trace_array_create_systems(name, NULL, 0, 0);
10405 }
10406
instance_mkdir(const char * name)10407 static int instance_mkdir(const char *name)
10408 {
10409 struct trace_array *tr;
10410 int ret;
10411
10412 guard(mutex)(&event_mutex);
10413 guard(mutex)(&trace_types_lock);
10414
10415 ret = -EEXIST;
10416 if (trace_array_find(name))
10417 return -EEXIST;
10418
10419 tr = trace_array_create(name);
10420
10421 ret = PTR_ERR_OR_ZERO(tr);
10422
10423 return ret;
10424 }
10425
10426 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10427 static u64 map_pages(unsigned long start, unsigned long size)
10428 {
10429 unsigned long vmap_start, vmap_end;
10430 struct vm_struct *area;
10431 int ret;
10432
10433 area = get_vm_area(size, VM_IOREMAP);
10434 if (!area)
10435 return 0;
10436
10437 vmap_start = (unsigned long) area->addr;
10438 vmap_end = vmap_start + size;
10439
10440 ret = vmap_page_range(vmap_start, vmap_end,
10441 start, pgprot_nx(PAGE_KERNEL));
10442 if (ret < 0) {
10443 free_vm_area(area);
10444 return 0;
10445 }
10446
10447 return (u64)vmap_start;
10448 }
10449 #else
map_pages(unsigned long start,unsigned long size)10450 static inline u64 map_pages(unsigned long start, unsigned long size)
10451 {
10452 return 0;
10453 }
10454 #endif
10455
10456 /**
10457 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10458 * @name: The name of the trace array to be looked up/created.
10459 * @systems: A list of systems to create event directories for (NULL for all)
10460 *
10461 * Returns pointer to trace array with given name.
10462 * NULL, if it cannot be created.
10463 *
10464 * NOTE: This function increments the reference counter associated with the
10465 * trace array returned. This makes sure it cannot be freed while in use.
10466 * Use trace_array_put() once the trace array is no longer needed.
10467 * If the trace_array is to be freed, trace_array_destroy() needs to
10468 * be called after the trace_array_put(), or simply let user space delete
10469 * it from the tracefs instances directory. But until the
10470 * trace_array_put() is called, user space can not delete it.
10471 *
10472 */
trace_array_get_by_name(const char * name,const char * systems)10473 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10474 {
10475 struct trace_array *tr;
10476
10477 guard(mutex)(&event_mutex);
10478 guard(mutex)(&trace_types_lock);
10479
10480 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10481 if (tr->name && strcmp(tr->name, name) == 0) {
10482 tr->ref++;
10483 return tr;
10484 }
10485 }
10486
10487 tr = trace_array_create_systems(name, systems, 0, 0);
10488
10489 if (IS_ERR(tr))
10490 tr = NULL;
10491 else
10492 tr->ref++;
10493
10494 return tr;
10495 }
10496 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10497
__remove_instance(struct trace_array * tr)10498 static int __remove_instance(struct trace_array *tr)
10499 {
10500 int i;
10501
10502 /* Reference counter for a newly created trace array = 1. */
10503 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10504 return -EBUSY;
10505
10506 list_del(&tr->list);
10507
10508 /* Disable all the flags that were enabled coming in */
10509 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10510 if ((1 << i) & ZEROED_TRACE_FLAGS)
10511 set_tracer_flag(tr, 1ULL << i, 0);
10512 }
10513
10514 if (printk_trace == tr)
10515 update_printk_trace(&global_trace);
10516
10517 if (update_marker_trace(tr, 0))
10518 synchronize_rcu();
10519
10520 tracing_set_nop(tr);
10521 clear_ftrace_function_probes(tr);
10522 event_trace_del_tracer(tr);
10523 ftrace_clear_pids(tr);
10524 ftrace_destroy_function_files(tr);
10525 tracefs_remove(tr->dir);
10526 free_percpu(tr->last_func_repeats);
10527 free_trace_buffers(tr);
10528 clear_tracing_err_log(tr);
10529 free_tracers(tr);
10530
10531 if (tr->range_name) {
10532 reserve_mem_release_by_name(tr->range_name);
10533 kfree(tr->range_name);
10534 }
10535 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10536 vfree((void *)tr->range_addr_start);
10537
10538 for (i = 0; i < tr->nr_topts; i++) {
10539 kfree(tr->topts[i].topts);
10540 }
10541 kfree(tr->topts);
10542
10543 free_cpumask_var(tr->pipe_cpumask);
10544 free_cpumask_var(tr->tracing_cpumask);
10545 kfree_const(tr->system_names);
10546 kfree(tr->name);
10547 kfree(tr);
10548
10549 return 0;
10550 }
10551
trace_array_destroy(struct trace_array * this_tr)10552 int trace_array_destroy(struct trace_array *this_tr)
10553 {
10554 struct trace_array *tr;
10555
10556 if (!this_tr)
10557 return -EINVAL;
10558
10559 guard(mutex)(&event_mutex);
10560 guard(mutex)(&trace_types_lock);
10561
10562
10563 /* Making sure trace array exists before destroying it. */
10564 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10565 if (tr == this_tr)
10566 return __remove_instance(tr);
10567 }
10568
10569 return -ENODEV;
10570 }
10571 EXPORT_SYMBOL_GPL(trace_array_destroy);
10572
instance_rmdir(const char * name)10573 static int instance_rmdir(const char *name)
10574 {
10575 struct trace_array *tr;
10576
10577 guard(mutex)(&event_mutex);
10578 guard(mutex)(&trace_types_lock);
10579
10580 tr = trace_array_find(name);
10581 if (!tr)
10582 return -ENODEV;
10583
10584 return __remove_instance(tr);
10585 }
10586
create_trace_instances(struct dentry * d_tracer)10587 static __init void create_trace_instances(struct dentry *d_tracer)
10588 {
10589 struct trace_array *tr;
10590
10591 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10592 instance_mkdir,
10593 instance_rmdir);
10594 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10595 return;
10596
10597 guard(mutex)(&event_mutex);
10598 guard(mutex)(&trace_types_lock);
10599
10600 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10601 if (!tr->name)
10602 continue;
10603 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10604 "Failed to create instance directory\n"))
10605 return;
10606 }
10607 }
10608
10609 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10610 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10611 {
10612 int cpu;
10613
10614 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10615 tr, &show_traces_fops);
10616
10617 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10618 tr, &set_tracer_fops);
10619
10620 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10621 tr, &tracing_cpumask_fops);
10622
10623 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10624 tr, &tracing_iter_fops);
10625
10626 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10627 tr, &tracing_fops);
10628
10629 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10630 tr, &tracing_pipe_fops);
10631
10632 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10633 tr, &tracing_entries_fops);
10634
10635 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10636 tr, &tracing_total_entries_fops);
10637
10638 trace_create_file("free_buffer", 0200, d_tracer,
10639 tr, &tracing_free_buffer_fops);
10640
10641 trace_create_file("trace_marker", 0220, d_tracer,
10642 tr, &tracing_mark_fops);
10643
10644 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10645
10646 trace_create_file("trace_marker_raw", 0220, d_tracer,
10647 tr, &tracing_mark_raw_fops);
10648
10649 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10650 &trace_clock_fops);
10651
10652 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10653 tr, &rb_simple_fops);
10654
10655 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10656 &trace_time_stamp_mode_fops);
10657
10658 tr->buffer_percent = 50;
10659
10660 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10661 tr, &buffer_percent_fops);
10662
10663 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10664 tr, &buffer_subbuf_size_fops);
10665
10666 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10667 tr, &tracing_syscall_buf_fops);
10668
10669 create_trace_options_dir(tr);
10670
10671 #ifdef CONFIG_TRACER_MAX_TRACE
10672 trace_create_maxlat_file(tr, d_tracer);
10673 #endif
10674
10675 if (ftrace_create_function_files(tr, d_tracer))
10676 MEM_FAIL(1, "Could not allocate function filter files");
10677
10678 if (tr->range_addr_start) {
10679 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10680 tr, &last_boot_fops);
10681 #ifdef CONFIG_TRACER_SNAPSHOT
10682 } else {
10683 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10684 tr, &snapshot_fops);
10685 #endif
10686 }
10687
10688 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10689 tr, &tracing_err_log_fops);
10690
10691 for_each_tracing_cpu(cpu)
10692 tracing_init_tracefs_percpu(tr, cpu);
10693
10694 ftrace_init_tracefs(tr, d_tracer);
10695 }
10696
10697 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10698 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10699 {
10700 struct vfsmount *mnt;
10701 struct file_system_type *type;
10702 struct fs_context *fc;
10703 int ret;
10704
10705 /*
10706 * To maintain backward compatibility for tools that mount
10707 * debugfs to get to the tracing facility, tracefs is automatically
10708 * mounted to the debugfs/tracing directory.
10709 */
10710 type = get_fs_type("tracefs");
10711 if (!type)
10712 return NULL;
10713
10714 fc = fs_context_for_submount(type, mntpt);
10715 put_filesystem(type);
10716 if (IS_ERR(fc))
10717 return ERR_CAST(fc);
10718
10719 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10720
10721 ret = vfs_parse_fs_string(fc, "source", "tracefs");
10722 if (!ret)
10723 mnt = fc_mount(fc);
10724 else
10725 mnt = ERR_PTR(ret);
10726
10727 put_fs_context(fc);
10728 return mnt;
10729 }
10730 #endif
10731
10732 /**
10733 * tracing_init_dentry - initialize top level trace array
10734 *
10735 * This is called when creating files or directories in the tracing
10736 * directory. It is called via fs_initcall() by any of the boot up code
10737 * and expects to return the dentry of the top level tracing directory.
10738 */
tracing_init_dentry(void)10739 int tracing_init_dentry(void)
10740 {
10741 struct trace_array *tr = &global_trace;
10742
10743 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10744 pr_warn("Tracing disabled due to lockdown\n");
10745 return -EPERM;
10746 }
10747
10748 /* The top level trace array uses NULL as parent */
10749 if (tr->dir)
10750 return 0;
10751
10752 if (WARN_ON(!tracefs_initialized()))
10753 return -ENODEV;
10754
10755 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10756 /*
10757 * As there may still be users that expect the tracing
10758 * files to exist in debugfs/tracing, we must automount
10759 * the tracefs file system there, so older tools still
10760 * work with the newer kernel.
10761 */
10762 tr->dir = debugfs_create_automount("tracing", NULL,
10763 trace_automount, NULL);
10764 #endif
10765
10766 return 0;
10767 }
10768
10769 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10770 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10771
10772 static struct workqueue_struct *eval_map_wq __initdata;
10773 static struct work_struct eval_map_work __initdata;
10774 static struct work_struct tracerfs_init_work __initdata;
10775
eval_map_work_func(struct work_struct * work)10776 static void __init eval_map_work_func(struct work_struct *work)
10777 {
10778 int len;
10779
10780 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10781 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10782 }
10783
trace_eval_init(void)10784 static int __init trace_eval_init(void)
10785 {
10786 INIT_WORK(&eval_map_work, eval_map_work_func);
10787
10788 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10789 if (!eval_map_wq) {
10790 pr_err("Unable to allocate eval_map_wq\n");
10791 /* Do work here */
10792 eval_map_work_func(&eval_map_work);
10793 return -ENOMEM;
10794 }
10795
10796 queue_work(eval_map_wq, &eval_map_work);
10797 return 0;
10798 }
10799
10800 subsys_initcall(trace_eval_init);
10801
trace_eval_sync(void)10802 static int __init trace_eval_sync(void)
10803 {
10804 /* Make sure the eval map updates are finished */
10805 if (eval_map_wq)
10806 destroy_workqueue(eval_map_wq);
10807 return 0;
10808 }
10809
10810 late_initcall_sync(trace_eval_sync);
10811
10812
10813 #ifdef CONFIG_MODULES
10814
module_exists(const char * module)10815 bool module_exists(const char *module)
10816 {
10817 /* All modules have the symbol __this_module */
10818 static const char this_mod[] = "__this_module";
10819 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10820 unsigned long val;
10821 int n;
10822
10823 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10824
10825 if (n > sizeof(modname) - 1)
10826 return false;
10827
10828 val = module_kallsyms_lookup_name(modname);
10829 return val != 0;
10830 }
10831
trace_module_add_evals(struct module * mod)10832 static void trace_module_add_evals(struct module *mod)
10833 {
10834 /*
10835 * Modules with bad taint do not have events created, do
10836 * not bother with enums either.
10837 */
10838 if (trace_module_has_bad_taint(mod))
10839 return;
10840
10841 /* Even if no trace_evals, this need to sanitize field types. */
10842 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10843 }
10844
10845 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10846 static void trace_module_remove_evals(struct module *mod)
10847 {
10848 union trace_eval_map_item *map;
10849 union trace_eval_map_item **last = &trace_eval_maps;
10850
10851 if (!mod->num_trace_evals)
10852 return;
10853
10854 guard(mutex)(&trace_eval_mutex);
10855
10856 map = trace_eval_maps;
10857
10858 while (map) {
10859 if (map->head.mod == mod)
10860 break;
10861 map = trace_eval_jmp_to_tail(map);
10862 last = &map->tail.next;
10863 map = map->tail.next;
10864 }
10865 if (!map)
10866 return;
10867
10868 *last = trace_eval_jmp_to_tail(map)->tail.next;
10869 kfree(map);
10870 }
10871 #else
trace_module_remove_evals(struct module * mod)10872 static inline void trace_module_remove_evals(struct module *mod) { }
10873 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10874
trace_module_record(struct module * mod,bool add)10875 static void trace_module_record(struct module *mod, bool add)
10876 {
10877 struct trace_array *tr;
10878 unsigned long flags;
10879
10880 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10881 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10882 /* Update any persistent trace array that has already been started */
10883 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10884 guard(mutex)(&scratch_mutex);
10885 save_mod(mod, tr);
10886 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10887 /* Update delta if the module loaded in previous boot */
10888 make_mod_delta(mod, tr);
10889 }
10890 }
10891 }
10892
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10893 static int trace_module_notify(struct notifier_block *self,
10894 unsigned long val, void *data)
10895 {
10896 struct module *mod = data;
10897
10898 switch (val) {
10899 case MODULE_STATE_COMING:
10900 trace_module_add_evals(mod);
10901 trace_module_record(mod, true);
10902 break;
10903 case MODULE_STATE_GOING:
10904 trace_module_remove_evals(mod);
10905 trace_module_record(mod, false);
10906 break;
10907 }
10908
10909 return NOTIFY_OK;
10910 }
10911
10912 static struct notifier_block trace_module_nb = {
10913 .notifier_call = trace_module_notify,
10914 .priority = 0,
10915 };
10916 #endif /* CONFIG_MODULES */
10917
tracer_init_tracefs_work_func(struct work_struct * work)10918 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10919 {
10920
10921 event_trace_init();
10922
10923 init_tracer_tracefs(&global_trace, NULL);
10924 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10925
10926 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10927 &global_trace, &tracing_thresh_fops);
10928
10929 trace_create_file("README", TRACE_MODE_READ, NULL,
10930 NULL, &tracing_readme_fops);
10931
10932 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10933 NULL, &tracing_saved_cmdlines_fops);
10934
10935 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10936 NULL, &tracing_saved_cmdlines_size_fops);
10937
10938 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10939 NULL, &tracing_saved_tgids_fops);
10940
10941 trace_create_eval_file(NULL);
10942
10943 #ifdef CONFIG_MODULES
10944 register_module_notifier(&trace_module_nb);
10945 #endif
10946
10947 #ifdef CONFIG_DYNAMIC_FTRACE
10948 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10949 NULL, &tracing_dyn_info_fops);
10950 #endif
10951
10952 create_trace_instances(NULL);
10953
10954 update_tracer_options();
10955 }
10956
tracer_init_tracefs(void)10957 static __init int tracer_init_tracefs(void)
10958 {
10959 int ret;
10960
10961 trace_access_lock_init();
10962
10963 ret = tracing_init_dentry();
10964 if (ret)
10965 return 0;
10966
10967 if (eval_map_wq) {
10968 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10969 queue_work(eval_map_wq, &tracerfs_init_work);
10970 } else {
10971 tracer_init_tracefs_work_func(NULL);
10972 }
10973
10974 if (rv_init_interface())
10975 pr_err("RV: Error while creating the RV interface\n");
10976
10977 return 0;
10978 }
10979
10980 fs_initcall(tracer_init_tracefs);
10981
10982 static int trace_die_panic_handler(struct notifier_block *self,
10983 unsigned long ev, void *unused);
10984
10985 static struct notifier_block trace_panic_notifier = {
10986 .notifier_call = trace_die_panic_handler,
10987 .priority = INT_MAX - 1,
10988 };
10989
10990 static struct notifier_block trace_die_notifier = {
10991 .notifier_call = trace_die_panic_handler,
10992 .priority = INT_MAX - 1,
10993 };
10994
10995 /*
10996 * The idea is to execute the following die/panic callback early, in order
10997 * to avoid showing irrelevant information in the trace (like other panic
10998 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10999 * warnings get disabled (to prevent potential log flooding).
11000 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)11001 static int trace_die_panic_handler(struct notifier_block *self,
11002 unsigned long ev, void *unused)
11003 {
11004 if (!ftrace_dump_on_oops_enabled())
11005 return NOTIFY_DONE;
11006
11007 /* The die notifier requires DIE_OOPS to trigger */
11008 if (self == &trace_die_notifier && ev != DIE_OOPS)
11009 return NOTIFY_DONE;
11010
11011 ftrace_dump(DUMP_PARAM);
11012
11013 return NOTIFY_DONE;
11014 }
11015
11016 /*
11017 * printk is set to max of 1024, we really don't need it that big.
11018 * Nothing should be printing 1000 characters anyway.
11019 */
11020 #define TRACE_MAX_PRINT 1000
11021
11022 /*
11023 * Define here KERN_TRACE so that we have one place to modify
11024 * it if we decide to change what log level the ftrace dump
11025 * should be at.
11026 */
11027 #define KERN_TRACE KERN_EMERG
11028
11029 void
trace_printk_seq(struct trace_seq * s)11030 trace_printk_seq(struct trace_seq *s)
11031 {
11032 /* Probably should print a warning here. */
11033 if (s->seq.len >= TRACE_MAX_PRINT)
11034 s->seq.len = TRACE_MAX_PRINT;
11035
11036 /*
11037 * More paranoid code. Although the buffer size is set to
11038 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11039 * an extra layer of protection.
11040 */
11041 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11042 s->seq.len = s->seq.size - 1;
11043
11044 /* should be zero ended, but we are paranoid. */
11045 s->buffer[s->seq.len] = 0;
11046
11047 printk(KERN_TRACE "%s", s->buffer);
11048
11049 trace_seq_init(s);
11050 }
11051
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)11052 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11053 {
11054 iter->tr = tr;
11055 iter->trace = iter->tr->current_trace;
11056 iter->cpu_file = RING_BUFFER_ALL_CPUS;
11057 iter->array_buffer = &tr->array_buffer;
11058
11059 if (iter->trace && iter->trace->open)
11060 iter->trace->open(iter);
11061
11062 /* Annotate start of buffers if we had overruns */
11063 if (ring_buffer_overruns(iter->array_buffer->buffer))
11064 iter->iter_flags |= TRACE_FILE_ANNOTATE;
11065
11066 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
11067 if (trace_clocks[iter->tr->clock_id].in_ns)
11068 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11069
11070 /* Can not use kmalloc for iter.temp and iter.fmt */
11071 iter->temp = static_temp_buf;
11072 iter->temp_size = STATIC_TEMP_BUF_SIZE;
11073 iter->fmt = static_fmt_buf;
11074 iter->fmt_size = STATIC_FMT_BUF_SIZE;
11075 }
11076
trace_init_global_iter(struct trace_iterator * iter)11077 void trace_init_global_iter(struct trace_iterator *iter)
11078 {
11079 trace_init_iter(iter, &global_trace);
11080 }
11081
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)11082 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11083 {
11084 /* use static because iter can be a bit big for the stack */
11085 static struct trace_iterator iter;
11086 unsigned int old_userobj;
11087 unsigned long flags;
11088 int cnt = 0;
11089
11090 /*
11091 * Always turn off tracing when we dump.
11092 * We don't need to show trace output of what happens
11093 * between multiple crashes.
11094 *
11095 * If the user does a sysrq-z, then they can re-enable
11096 * tracing with echo 1 > tracing_on.
11097 */
11098 tracer_tracing_off(tr);
11099
11100 local_irq_save(flags);
11101
11102 /* Simulate the iterator */
11103 trace_init_iter(&iter, tr);
11104
11105 /* While dumping, do not allow the buffer to be enable */
11106 tracer_tracing_disable(tr);
11107
11108 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11109
11110 /* don't look at user memory in panic mode */
11111 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11112
11113 if (dump_mode == DUMP_ORIG)
11114 iter.cpu_file = raw_smp_processor_id();
11115 else
11116 iter.cpu_file = RING_BUFFER_ALL_CPUS;
11117
11118 if (tr == &global_trace)
11119 printk(KERN_TRACE "Dumping ftrace buffer:\n");
11120 else
11121 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11122
11123 /* Did function tracer already get disabled? */
11124 if (ftrace_is_dead()) {
11125 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11126 printk("# MAY BE MISSING FUNCTION EVENTS\n");
11127 }
11128
11129 /*
11130 * We need to stop all tracing on all CPUS to read
11131 * the next buffer. This is a bit expensive, but is
11132 * not done often. We fill all what we can read,
11133 * and then release the locks again.
11134 */
11135
11136 while (!trace_empty(&iter)) {
11137
11138 if (!cnt)
11139 printk(KERN_TRACE "---------------------------------\n");
11140
11141 cnt++;
11142
11143 trace_iterator_reset(&iter);
11144 iter.iter_flags |= TRACE_FILE_LAT_FMT;
11145
11146 if (trace_find_next_entry_inc(&iter) != NULL) {
11147 int ret;
11148
11149 ret = print_trace_line(&iter);
11150 if (ret != TRACE_TYPE_NO_CONSUME)
11151 trace_consume(&iter);
11152
11153 trace_printk_seq(&iter.seq);
11154 }
11155 touch_nmi_watchdog();
11156 }
11157
11158 if (!cnt)
11159 printk(KERN_TRACE " (ftrace buffer empty)\n");
11160 else
11161 printk(KERN_TRACE "---------------------------------\n");
11162
11163 tr->trace_flags |= old_userobj;
11164
11165 tracer_tracing_enable(tr);
11166 local_irq_restore(flags);
11167 }
11168
ftrace_dump_by_param(void)11169 static void ftrace_dump_by_param(void)
11170 {
11171 bool first_param = true;
11172 char dump_param[MAX_TRACER_SIZE];
11173 char *buf, *token, *inst_name;
11174 struct trace_array *tr;
11175
11176 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11177 buf = dump_param;
11178
11179 while ((token = strsep(&buf, ",")) != NULL) {
11180 if (first_param) {
11181 first_param = false;
11182 if (!strcmp("0", token))
11183 continue;
11184 else if (!strcmp("1", token)) {
11185 ftrace_dump_one(&global_trace, DUMP_ALL);
11186 continue;
11187 }
11188 else if (!strcmp("2", token) ||
11189 !strcmp("orig_cpu", token)) {
11190 ftrace_dump_one(&global_trace, DUMP_ORIG);
11191 continue;
11192 }
11193 }
11194
11195 inst_name = strsep(&token, "=");
11196 tr = trace_array_find(inst_name);
11197 if (!tr) {
11198 printk(KERN_TRACE "Instance %s not found\n", inst_name);
11199 continue;
11200 }
11201
11202 if (token && (!strcmp("2", token) ||
11203 !strcmp("orig_cpu", token)))
11204 ftrace_dump_one(tr, DUMP_ORIG);
11205 else
11206 ftrace_dump_one(tr, DUMP_ALL);
11207 }
11208 }
11209
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)11210 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11211 {
11212 static atomic_t dump_running;
11213
11214 /* Only allow one dump user at a time. */
11215 if (atomic_inc_return(&dump_running) != 1) {
11216 atomic_dec(&dump_running);
11217 return;
11218 }
11219
11220 switch (oops_dump_mode) {
11221 case DUMP_ALL:
11222 ftrace_dump_one(&global_trace, DUMP_ALL);
11223 break;
11224 case DUMP_ORIG:
11225 ftrace_dump_one(&global_trace, DUMP_ORIG);
11226 break;
11227 case DUMP_PARAM:
11228 ftrace_dump_by_param();
11229 break;
11230 case DUMP_NONE:
11231 break;
11232 default:
11233 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11234 ftrace_dump_one(&global_trace, DUMP_ALL);
11235 }
11236
11237 atomic_dec(&dump_running);
11238 }
11239 EXPORT_SYMBOL_GPL(ftrace_dump);
11240
11241 #define WRITE_BUFSIZE 4096
11242
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))11243 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11244 size_t count, loff_t *ppos,
11245 int (*createfn)(const char *))
11246 {
11247 char *kbuf __free(kfree) = NULL;
11248 char *buf, *tmp;
11249 int ret = 0;
11250 size_t done = 0;
11251 size_t size;
11252
11253 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11254 if (!kbuf)
11255 return -ENOMEM;
11256
11257 while (done < count) {
11258 size = count - done;
11259
11260 if (size >= WRITE_BUFSIZE)
11261 size = WRITE_BUFSIZE - 1;
11262
11263 if (copy_from_user(kbuf, buffer + done, size))
11264 return -EFAULT;
11265
11266 kbuf[size] = '\0';
11267 buf = kbuf;
11268 do {
11269 tmp = strchr(buf, '\n');
11270 if (tmp) {
11271 *tmp = '\0';
11272 size = tmp - buf + 1;
11273 } else {
11274 size = strlen(buf);
11275 if (done + size < count) {
11276 if (buf != kbuf)
11277 break;
11278 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11279 pr_warn("Line length is too long: Should be less than %d\n",
11280 WRITE_BUFSIZE - 2);
11281 return -EINVAL;
11282 }
11283 }
11284 done += size;
11285
11286 /* Remove comments */
11287 tmp = strchr(buf, '#');
11288
11289 if (tmp)
11290 *tmp = '\0';
11291
11292 ret = createfn(buf);
11293 if (ret)
11294 return ret;
11295 buf += size;
11296
11297 } while (done < count);
11298 }
11299 return done;
11300 }
11301
11302 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)11303 __init static bool tr_needs_alloc_snapshot(const char *name)
11304 {
11305 char *test;
11306 int len = strlen(name);
11307 bool ret;
11308
11309 if (!boot_snapshot_index)
11310 return false;
11311
11312 if (strncmp(name, boot_snapshot_info, len) == 0 &&
11313 boot_snapshot_info[len] == '\t')
11314 return true;
11315
11316 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11317 if (!test)
11318 return false;
11319
11320 sprintf(test, "\t%s\t", name);
11321 ret = strstr(boot_snapshot_info, test) == NULL;
11322 kfree(test);
11323 return ret;
11324 }
11325
do_allocate_snapshot(const char * name)11326 __init static void do_allocate_snapshot(const char *name)
11327 {
11328 if (!tr_needs_alloc_snapshot(name))
11329 return;
11330
11331 /*
11332 * When allocate_snapshot is set, the next call to
11333 * allocate_trace_buffers() (called by trace_array_get_by_name())
11334 * will allocate the snapshot buffer. That will also clear
11335 * this flag.
11336 */
11337 allocate_snapshot = true;
11338 }
11339 #else
do_allocate_snapshot(const char * name)11340 static inline void do_allocate_snapshot(const char *name) { }
11341 #endif
11342
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)11343 __init static int backup_instance_area(const char *backup,
11344 unsigned long *addr, phys_addr_t *size)
11345 {
11346 struct trace_array *backup_tr;
11347 void *allocated_vaddr = NULL;
11348
11349 backup_tr = trace_array_get_by_name(backup, NULL);
11350 if (!backup_tr) {
11351 pr_warn("Tracing: Instance %s is not found.\n", backup);
11352 return -ENOENT;
11353 }
11354
11355 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11356 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11357 trace_array_put(backup_tr);
11358 return -EINVAL;
11359 }
11360
11361 *size = backup_tr->range_addr_size;
11362
11363 allocated_vaddr = vzalloc(*size);
11364 if (!allocated_vaddr) {
11365 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11366 backup, (unsigned long)*size);
11367 trace_array_put(backup_tr);
11368 return -ENOMEM;
11369 }
11370
11371 memcpy(allocated_vaddr,
11372 (void *)backup_tr->range_addr_start, (size_t)*size);
11373 *addr = (unsigned long)allocated_vaddr;
11374
11375 trace_array_put(backup_tr);
11376 return 0;
11377 }
11378
enable_instances(void)11379 __init static void enable_instances(void)
11380 {
11381 struct trace_array *tr;
11382 bool memmap_area = false;
11383 char *curr_str;
11384 char *name;
11385 char *str;
11386 char *tok;
11387
11388 /* A tab is always appended */
11389 boot_instance_info[boot_instance_index - 1] = '\0';
11390 str = boot_instance_info;
11391
11392 while ((curr_str = strsep(&str, "\t"))) {
11393 phys_addr_t start = 0;
11394 phys_addr_t size = 0;
11395 unsigned long addr = 0;
11396 bool traceprintk = false;
11397 bool traceoff = false;
11398 char *flag_delim;
11399 char *addr_delim;
11400 char *rname __free(kfree) = NULL;
11401 char *backup;
11402
11403 tok = strsep(&curr_str, ",");
11404
11405 name = strsep(&tok, "=");
11406 backup = tok;
11407
11408 flag_delim = strchr(name, '^');
11409 addr_delim = strchr(name, '@');
11410
11411 if (addr_delim)
11412 *addr_delim++ = '\0';
11413
11414 if (flag_delim)
11415 *flag_delim++ = '\0';
11416
11417 if (backup) {
11418 if (backup_instance_area(backup, &addr, &size) < 0)
11419 continue;
11420 }
11421
11422 if (flag_delim) {
11423 char *flag;
11424
11425 while ((flag = strsep(&flag_delim, "^"))) {
11426 if (strcmp(flag, "traceoff") == 0) {
11427 traceoff = true;
11428 } else if ((strcmp(flag, "printk") == 0) ||
11429 (strcmp(flag, "traceprintk") == 0) ||
11430 (strcmp(flag, "trace_printk") == 0)) {
11431 traceprintk = true;
11432 } else {
11433 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11434 flag, name);
11435 }
11436 }
11437 }
11438
11439 tok = addr_delim;
11440 if (tok && isdigit(*tok)) {
11441 start = memparse(tok, &tok);
11442 if (!start) {
11443 pr_warn("Tracing: Invalid boot instance address for %s\n",
11444 name);
11445 continue;
11446 }
11447 if (*tok != ':') {
11448 pr_warn("Tracing: No size specified for instance %s\n", name);
11449 continue;
11450 }
11451 tok++;
11452 size = memparse(tok, &tok);
11453 if (!size) {
11454 pr_warn("Tracing: Invalid boot instance size for %s\n",
11455 name);
11456 continue;
11457 }
11458 memmap_area = true;
11459 } else if (tok) {
11460 if (!reserve_mem_find_by_name(tok, &start, &size)) {
11461 start = 0;
11462 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11463 continue;
11464 }
11465 rname = kstrdup(tok, GFP_KERNEL);
11466 }
11467
11468 if (start) {
11469 /* Start and size must be page aligned */
11470 if (start & ~PAGE_MASK) {
11471 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11472 continue;
11473 }
11474 if (size & ~PAGE_MASK) {
11475 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11476 continue;
11477 }
11478
11479 if (memmap_area)
11480 addr = map_pages(start, size);
11481 else
11482 addr = (unsigned long)phys_to_virt(start);
11483 if (addr) {
11484 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11485 name, &start, (unsigned long)size);
11486 } else {
11487 pr_warn("Tracing: Failed to map boot instance %s\n", name);
11488 continue;
11489 }
11490 } else {
11491 /* Only non mapped buffers have snapshot buffers */
11492 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11493 do_allocate_snapshot(name);
11494 }
11495
11496 tr = trace_array_create_systems(name, NULL, addr, size);
11497 if (IS_ERR(tr)) {
11498 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11499 continue;
11500 }
11501
11502 if (traceoff)
11503 tracer_tracing_off(tr);
11504
11505 if (traceprintk)
11506 update_printk_trace(tr);
11507
11508 /*
11509 * memmap'd buffers can not be freed.
11510 */
11511 if (memmap_area) {
11512 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11513 tr->ref++;
11514 }
11515
11516 /*
11517 * Backup buffers can be freed but need vfree().
11518 */
11519 if (backup)
11520 tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11521
11522 if (start || backup) {
11523 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11524 tr->range_name = no_free_ptr(rname);
11525 }
11526
11527 while ((tok = strsep(&curr_str, ","))) {
11528 early_enable_events(tr, tok, true);
11529 }
11530 }
11531 }
11532
tracer_alloc_buffers(void)11533 __init static int tracer_alloc_buffers(void)
11534 {
11535 int ring_buf_size;
11536 int ret = -ENOMEM;
11537
11538
11539 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11540 pr_warn("Tracing disabled due to lockdown\n");
11541 return -EPERM;
11542 }
11543
11544 /*
11545 * Make sure we don't accidentally add more trace options
11546 * than we have bits for.
11547 */
11548 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11549
11550 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11551 return -ENOMEM;
11552
11553 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11554 goto out_free_buffer_mask;
11555
11556 /* Only allocate trace_printk buffers if a trace_printk exists */
11557 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11558 /* Must be called before global_trace.buffer is allocated */
11559 trace_printk_init_buffers();
11560
11561 /* To save memory, keep the ring buffer size to its minimum */
11562 if (global_trace.ring_buffer_expanded)
11563 ring_buf_size = trace_buf_size;
11564 else
11565 ring_buf_size = 1;
11566
11567 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11568 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11569
11570 raw_spin_lock_init(&global_trace.start_lock);
11571
11572 /*
11573 * The prepare callbacks allocates some memory for the ring buffer. We
11574 * don't free the buffer if the CPU goes down. If we were to free
11575 * the buffer, then the user would lose any trace that was in the
11576 * buffer. The memory will be removed once the "instance" is removed.
11577 */
11578 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11579 "trace/RB:prepare", trace_rb_cpu_prepare,
11580 NULL);
11581 if (ret < 0)
11582 goto out_free_cpumask;
11583 /* Used for event triggers */
11584 ret = -ENOMEM;
11585 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11586 if (!temp_buffer)
11587 goto out_rm_hp_state;
11588
11589 if (trace_create_savedcmd() < 0)
11590 goto out_free_temp_buffer;
11591
11592 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11593 goto out_free_savedcmd;
11594
11595 /* TODO: make the number of buffers hot pluggable with CPUS */
11596 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11597 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11598 goto out_free_pipe_cpumask;
11599 }
11600 if (global_trace.buffer_disabled)
11601 tracing_off();
11602
11603 if (trace_boot_clock) {
11604 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11605 if (ret < 0)
11606 pr_warn("Trace clock %s not defined, going back to default\n",
11607 trace_boot_clock);
11608 }
11609
11610 /*
11611 * register_tracer() might reference current_trace, so it
11612 * needs to be set before we register anything. This is
11613 * just a bootstrap of current_trace anyway.
11614 */
11615 global_trace.current_trace = &nop_trace;
11616 global_trace.current_trace_flags = nop_trace.flags;
11617
11618 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11619 #ifdef CONFIG_TRACER_MAX_TRACE
11620 spin_lock_init(&global_trace.snapshot_trigger_lock);
11621 #endif
11622 ftrace_init_global_array_ops(&global_trace);
11623
11624 #ifdef CONFIG_MODULES
11625 INIT_LIST_HEAD(&global_trace.mod_events);
11626 #endif
11627
11628 init_trace_flags_index(&global_trace);
11629
11630 INIT_LIST_HEAD(&global_trace.tracers);
11631
11632 /* All seems OK, enable tracing */
11633 tracing_disabled = 0;
11634
11635 atomic_notifier_chain_register(&panic_notifier_list,
11636 &trace_panic_notifier);
11637
11638 register_die_notifier(&trace_die_notifier);
11639
11640 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11641
11642 global_trace.syscall_buf_sz = syscall_buf_size;
11643
11644 INIT_LIST_HEAD(&global_trace.systems);
11645 INIT_LIST_HEAD(&global_trace.events);
11646 INIT_LIST_HEAD(&global_trace.hist_vars);
11647 INIT_LIST_HEAD(&global_trace.err_log);
11648 list_add(&global_trace.marker_list, &marker_copies);
11649 list_add(&global_trace.list, &ftrace_trace_arrays);
11650
11651 register_tracer(&nop_trace);
11652
11653 /* Function tracing may start here (via kernel command line) */
11654 init_function_trace();
11655
11656 apply_trace_boot_options();
11657
11658 register_snapshot_cmd();
11659
11660 return 0;
11661
11662 out_free_pipe_cpumask:
11663 free_cpumask_var(global_trace.pipe_cpumask);
11664 out_free_savedcmd:
11665 trace_free_saved_cmdlines_buffer();
11666 out_free_temp_buffer:
11667 ring_buffer_free(temp_buffer);
11668 out_rm_hp_state:
11669 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11670 out_free_cpumask:
11671 free_cpumask_var(global_trace.tracing_cpumask);
11672 out_free_buffer_mask:
11673 free_cpumask_var(tracing_buffer_mask);
11674 return ret;
11675 }
11676
11677 #ifdef CONFIG_FUNCTION_TRACER
11678 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11679 struct trace_array *trace_get_global_array(void)
11680 {
11681 return &global_trace;
11682 }
11683 #endif
11684
ftrace_boot_snapshot(void)11685 void __init ftrace_boot_snapshot(void)
11686 {
11687 #ifdef CONFIG_TRACER_MAX_TRACE
11688 struct trace_array *tr;
11689
11690 if (!snapshot_at_boot)
11691 return;
11692
11693 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11694 if (!tr->allocated_snapshot)
11695 continue;
11696
11697 tracing_snapshot_instance(tr);
11698 trace_array_puts(tr, "** Boot snapshot taken **\n");
11699 }
11700 #endif
11701 }
11702
early_trace_init(void)11703 void __init early_trace_init(void)
11704 {
11705 if (tracepoint_printk) {
11706 tracepoint_print_iter =
11707 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11708 if (MEM_FAIL(!tracepoint_print_iter,
11709 "Failed to allocate trace iterator\n"))
11710 tracepoint_printk = 0;
11711 else
11712 static_key_enable(&tracepoint_printk_key.key);
11713 }
11714 tracer_alloc_buffers();
11715
11716 init_events();
11717 }
11718
trace_init(void)11719 void __init trace_init(void)
11720 {
11721 trace_event_init();
11722
11723 if (boot_instance_index)
11724 enable_instances();
11725 }
11726
clear_boot_tracer(void)11727 __init static void clear_boot_tracer(void)
11728 {
11729 /*
11730 * The default tracer at boot buffer is an init section.
11731 * This function is called in lateinit. If we did not
11732 * find the boot tracer, then clear it out, to prevent
11733 * later registration from accessing the buffer that is
11734 * about to be freed.
11735 */
11736 if (!default_bootup_tracer)
11737 return;
11738
11739 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11740 default_bootup_tracer);
11741 default_bootup_tracer = NULL;
11742 }
11743
11744 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11745 __init static void tracing_set_default_clock(void)
11746 {
11747 /* sched_clock_stable() is determined in late_initcall */
11748 if (!trace_boot_clock && !sched_clock_stable()) {
11749 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11750 pr_warn("Can not set tracing clock due to lockdown\n");
11751 return;
11752 }
11753
11754 printk(KERN_WARNING
11755 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11756 "If you want to keep using the local clock, then add:\n"
11757 " \"trace_clock=local\"\n"
11758 "on the kernel command line\n");
11759 tracing_set_clock(&global_trace, "global");
11760 }
11761 }
11762 #else
tracing_set_default_clock(void)11763 static inline void tracing_set_default_clock(void) { }
11764 #endif
11765
late_trace_init(void)11766 __init static int late_trace_init(void)
11767 {
11768 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11769 static_key_disable(&tracepoint_printk_key.key);
11770 tracepoint_printk = 0;
11771 }
11772
11773 if (traceoff_after_boot)
11774 tracing_off();
11775
11776 tracing_set_default_clock();
11777 clear_boot_tracer();
11778 return 0;
11779 }
11780
11781 late_initcall_sync(late_trace_init);
11782