1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <linux/sort.h>
54 #include <linux/io.h> /* vmap_page_range() */
55 #include <linux/fs_context.h>
56
57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
58
59 #include "trace.h"
60 #include "trace_output.h"
61
62 #ifdef CONFIG_FTRACE_STARTUP_TEST
63 /*
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
69 */
70 static bool __read_mostly tracing_selftest_running;
71
72 /*
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
75 */
76 bool __read_mostly tracing_selftest_disabled;
77
disable_tracing_selftest(const char * reason)78 void __init disable_tracing_selftest(const char *reason)
79 {
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83 }
84 }
85 #else
86 #define tracing_selftest_running 0
87 #define tracing_selftest_disabled 0
88 #endif
89
90 /* Pipe tracepoints to printk */
91 static struct trace_iterator *tracepoint_print_iter;
92 int tracepoint_printk;
93 static bool tracepoint_printk_stop_on_boot __initdata;
94 static bool traceoff_after_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* Store tracers and their flags per instance */
98 struct tracers {
99 struct list_head list;
100 struct tracer *tracer;
101 struct tracer_flags *flags;
102 };
103
104 /*
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
107 * occurred.
108 */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
115 * this back to zero.
116 */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly tracing_buffer_mask;
120
121 #define MAX_TRACER_SIZE 100
122 /*
123 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124 *
125 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126 * is set, then ftrace_dump is called. This will output the contents
127 * of the ftrace buffers to the console. This is very useful for
128 * capturing traces that lead to crashes and outputting it to a
129 * serial console.
130 *
131 * It is default off, but you can enable it with either specifying
132 * "ftrace_dump_on_oops" in the kernel command line, or setting
133 * /proc/sys/kernel/ftrace_dump_on_oops
134 * Set 1 if you want to dump buffers of all CPUs
135 * Set 2 if you want to dump the buffer of the CPU that triggered oops
136 * Set instance name if you want to dump the specific trace instance
137 * Multiple instance dump is also supported, and instances are separated
138 * by commas.
139 */
140 /* Set to string format zero to disable by default */
141 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 static int __disable_trace_on_warning;
145
146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
147 void *buffer, size_t *lenp, loff_t *ppos);
148 static const struct ctl_table trace_sysctl_table[] = {
149 {
150 .procname = "ftrace_dump_on_oops",
151 .data = &ftrace_dump_on_oops,
152 .maxlen = MAX_TRACER_SIZE,
153 .mode = 0644,
154 .proc_handler = proc_dostring,
155 },
156 {
157 .procname = "traceoff_on_warning",
158 .data = &__disable_trace_on_warning,
159 .maxlen = sizeof(__disable_trace_on_warning),
160 .mode = 0644,
161 .proc_handler = proc_dointvec,
162 },
163 {
164 .procname = "tracepoint_printk",
165 .data = &tracepoint_printk,
166 .maxlen = sizeof(tracepoint_printk),
167 .mode = 0644,
168 .proc_handler = tracepoint_printk_sysctl,
169 },
170 };
171
init_trace_sysctls(void)172 static int __init init_trace_sysctls(void)
173 {
174 register_sysctl_init("kernel", trace_sysctl_table);
175 return 0;
176 }
177 subsys_initcall(init_trace_sysctls);
178
179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
180 /* Map of enums to their values, for "eval_map" file */
181 struct trace_eval_map_head {
182 struct module *mod;
183 unsigned long length;
184 };
185
186 union trace_eval_map_item;
187
188 struct trace_eval_map_tail {
189 /*
190 * "end" is first and points to NULL as it must be different
191 * than "mod" or "eval_string"
192 */
193 union trace_eval_map_item *next;
194 const char *end; /* points to NULL */
195 };
196
197 static DEFINE_MUTEX(trace_eval_mutex);
198
199 /*
200 * The trace_eval_maps are saved in an array with two extra elements,
201 * one at the beginning, and one at the end. The beginning item contains
202 * the count of the saved maps (head.length), and the module they
203 * belong to if not built in (head.mod). The ending item contains a
204 * pointer to the next array of saved eval_map items.
205 */
206 union trace_eval_map_item {
207 struct trace_eval_map map;
208 struct trace_eval_map_head head;
209 struct trace_eval_map_tail tail;
210 };
211
212 static union trace_eval_map_item *trace_eval_maps;
213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
214
215 int tracing_set_tracer(struct trace_array *tr, const char *buf);
216 static void ftrace_trace_userstack(struct trace_array *tr,
217 struct trace_buffer *buffer,
218 unsigned int trace_ctx);
219
220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
221 static char *default_bootup_tracer;
222
223 static bool allocate_snapshot;
224 static bool snapshot_at_boot;
225
226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
227 static int boot_instance_index;
228
229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_snapshot_index;
231
set_cmdline_ftrace(char * str)232 static int __init set_cmdline_ftrace(char *str)
233 {
234 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
235 default_bootup_tracer = bootup_tracer_buf;
236 /* We are using ftrace early, expand it */
237 trace_set_ring_buffer_expanded(NULL);
238 return 1;
239 }
240 __setup("ftrace=", set_cmdline_ftrace);
241
ftrace_dump_on_oops_enabled(void)242 int ftrace_dump_on_oops_enabled(void)
243 {
244 if (!strcmp("0", ftrace_dump_on_oops))
245 return 0;
246 else
247 return 1;
248 }
249
set_ftrace_dump_on_oops(char * str)250 static int __init set_ftrace_dump_on_oops(char *str)
251 {
252 if (!*str) {
253 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
254 return 1;
255 }
256
257 if (*str == ',') {
258 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
259 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
260 return 1;
261 }
262
263 if (*str++ == '=') {
264 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
265 return 1;
266 }
267
268 return 0;
269 }
270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
271
stop_trace_on_warning(char * str)272 static int __init stop_trace_on_warning(char *str)
273 {
274 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
275 __disable_trace_on_warning = 1;
276 return 1;
277 }
278 __setup("traceoff_on_warning", stop_trace_on_warning);
279
boot_alloc_snapshot(char * str)280 static int __init boot_alloc_snapshot(char *str)
281 {
282 char *slot = boot_snapshot_info + boot_snapshot_index;
283 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
284 int ret;
285
286 if (str[0] == '=') {
287 str++;
288 if (strlen(str) >= left)
289 return -1;
290
291 ret = snprintf(slot, left, "%s\t", str);
292 boot_snapshot_index += ret;
293 } else {
294 allocate_snapshot = true;
295 /* We also need the main ring buffer expanded */
296 trace_set_ring_buffer_expanded(NULL);
297 }
298 return 1;
299 }
300 __setup("alloc_snapshot", boot_alloc_snapshot);
301
302
boot_snapshot(char * str)303 static int __init boot_snapshot(char *str)
304 {
305 snapshot_at_boot = true;
306 boot_alloc_snapshot(str);
307 return 1;
308 }
309 __setup("ftrace_boot_snapshot", boot_snapshot);
310
311
boot_instance(char * str)312 static int __init boot_instance(char *str)
313 {
314 char *slot = boot_instance_info + boot_instance_index;
315 int left = sizeof(boot_instance_info) - boot_instance_index;
316 int ret;
317
318 if (strlen(str) >= left)
319 return -1;
320
321 ret = snprintf(slot, left, "%s\t", str);
322 boot_instance_index += ret;
323
324 return 1;
325 }
326 __setup("trace_instance=", boot_instance);
327
328
329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
330
set_trace_boot_options(char * str)331 static int __init set_trace_boot_options(char *str)
332 {
333 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
334 return 1;
335 }
336 __setup("trace_options=", set_trace_boot_options);
337
338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
339 static char *trace_boot_clock __initdata;
340
set_trace_boot_clock(char * str)341 static int __init set_trace_boot_clock(char *str)
342 {
343 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
344 trace_boot_clock = trace_boot_clock_buf;
345 return 1;
346 }
347 __setup("trace_clock=", set_trace_boot_clock);
348
set_tracepoint_printk(char * str)349 static int __init set_tracepoint_printk(char *str)
350 {
351 /* Ignore the "tp_printk_stop_on_boot" param */
352 if (*str == '_')
353 return 0;
354
355 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
356 tracepoint_printk = 1;
357 return 1;
358 }
359 __setup("tp_printk", set_tracepoint_printk);
360
set_tracepoint_printk_stop(char * str)361 static int __init set_tracepoint_printk_stop(char *str)
362 {
363 tracepoint_printk_stop_on_boot = true;
364 return 1;
365 }
366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
367
set_traceoff_after_boot(char * str)368 static int __init set_traceoff_after_boot(char *str)
369 {
370 traceoff_after_boot = true;
371 return 1;
372 }
373 __setup("traceoff_after_boot", set_traceoff_after_boot);
374
ns2usecs(u64 nsec)375 unsigned long long ns2usecs(u64 nsec)
376 {
377 nsec += 500;
378 do_div(nsec, 1000);
379 return nsec;
380 }
381
382 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)383 trace_process_export(struct trace_export *export,
384 struct ring_buffer_event *event, int flag)
385 {
386 struct trace_entry *entry;
387 unsigned int size = 0;
388
389 if (export->flags & flag) {
390 entry = ring_buffer_event_data(event);
391 size = ring_buffer_event_length(event);
392 export->write(export, entry, size);
393 }
394 }
395
396 static DEFINE_MUTEX(ftrace_export_lock);
397
398 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
399
400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
403
ftrace_exports_enable(struct trace_export * export)404 static inline void ftrace_exports_enable(struct trace_export *export)
405 {
406 if (export->flags & TRACE_EXPORT_FUNCTION)
407 static_branch_inc(&trace_function_exports_enabled);
408
409 if (export->flags & TRACE_EXPORT_EVENT)
410 static_branch_inc(&trace_event_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_MARKER)
413 static_branch_inc(&trace_marker_exports_enabled);
414 }
415
ftrace_exports_disable(struct trace_export * export)416 static inline void ftrace_exports_disable(struct trace_export *export)
417 {
418 if (export->flags & TRACE_EXPORT_FUNCTION)
419 static_branch_dec(&trace_function_exports_enabled);
420
421 if (export->flags & TRACE_EXPORT_EVENT)
422 static_branch_dec(&trace_event_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_MARKER)
425 static_branch_dec(&trace_marker_exports_enabled);
426 }
427
ftrace_exports(struct ring_buffer_event * event,int flag)428 static void ftrace_exports(struct ring_buffer_event *event, int flag)
429 {
430 struct trace_export *export;
431
432 guard(preempt_notrace)();
433
434 export = rcu_dereference_raw_check(ftrace_exports_list);
435 while (export) {
436 trace_process_export(export, event, flag);
437 export = rcu_dereference_raw_check(export->next);
438 }
439 }
440
441 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)442 add_trace_export(struct trace_export **list, struct trace_export *export)
443 {
444 rcu_assign_pointer(export->next, *list);
445 /*
446 * We are entering export into the list but another
447 * CPU might be walking that list. We need to make sure
448 * the export->next pointer is valid before another CPU sees
449 * the export pointer included into the list.
450 */
451 rcu_assign_pointer(*list, export);
452 }
453
454 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)455 rm_trace_export(struct trace_export **list, struct trace_export *export)
456 {
457 struct trace_export **p;
458
459 for (p = list; *p != NULL; p = &(*p)->next)
460 if (*p == export)
461 break;
462
463 if (*p != export)
464 return -1;
465
466 rcu_assign_pointer(*p, (*p)->next);
467
468 return 0;
469 }
470
471 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)472 add_ftrace_export(struct trace_export **list, struct trace_export *export)
473 {
474 ftrace_exports_enable(export);
475
476 add_trace_export(list, export);
477 }
478
479 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)480 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
481 {
482 int ret;
483
484 ret = rm_trace_export(list, export);
485 ftrace_exports_disable(export);
486
487 return ret;
488 }
489
register_ftrace_export(struct trace_export * export)490 int register_ftrace_export(struct trace_export *export)
491 {
492 if (WARN_ON_ONCE(!export->write))
493 return -1;
494
495 guard(mutex)(&ftrace_export_lock);
496
497 add_ftrace_export(&ftrace_exports_list, export);
498
499 return 0;
500 }
501 EXPORT_SYMBOL_GPL(register_ftrace_export);
502
unregister_ftrace_export(struct trace_export * export)503 int unregister_ftrace_export(struct trace_export *export)
504 {
505 guard(mutex)(&ftrace_export_lock);
506 return rm_ftrace_export(&ftrace_exports_list, export);
507 }
508 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
509
510 /* trace_flags holds trace_options default values */
511 #define TRACE_DEFAULT_FLAGS \
512 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
513 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
514 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
515 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
516 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
517 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
518 TRACE_ITER(COPY_MARKER))
519
520 /* trace_options that are only supported by global_trace */
521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
522 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
523 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
524
525 /* trace_flags that are default zero for instances */
526 #define ZEROED_TRACE_FLAGS \
527 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
528 TRACE_ITER(COPY_MARKER))
529
530 /*
531 * The global_trace is the descriptor that holds the top-level tracing
532 * buffers for the live tracing.
533 */
534 static struct trace_array global_trace = {
535 .trace_flags = TRACE_DEFAULT_FLAGS,
536 };
537
538 static struct trace_array *printk_trace = &global_trace;
539
540 /* List of trace_arrays interested in the top level trace_marker */
541 static LIST_HEAD(marker_copies);
542
printk_binsafe(struct trace_array * tr)543 static __always_inline bool printk_binsafe(struct trace_array *tr)
544 {
545 /*
546 * The binary format of traceprintk can cause a crash if used
547 * by a buffer from another boot. Force the use of the
548 * non binary version of trace_printk if the trace_printk
549 * buffer is a boot mapped ring buffer.
550 */
551 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
552 }
553
update_printk_trace(struct trace_array * tr)554 static void update_printk_trace(struct trace_array *tr)
555 {
556 if (printk_trace == tr)
557 return;
558
559 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
560 printk_trace = tr;
561 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
562 }
563
564 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)565 static bool update_marker_trace(struct trace_array *tr, int enabled)
566 {
567 lockdep_assert_held(&event_mutex);
568
569 if (enabled) {
570 if (!list_empty(&tr->marker_list))
571 return false;
572
573 list_add_rcu(&tr->marker_list, &marker_copies);
574 tr->trace_flags |= TRACE_ITER(COPY_MARKER);
575 return true;
576 }
577
578 if (list_empty(&tr->marker_list))
579 return false;
580
581 list_del_init(&tr->marker_list);
582 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
583 return true;
584 }
585
trace_set_ring_buffer_expanded(struct trace_array * tr)586 void trace_set_ring_buffer_expanded(struct trace_array *tr)
587 {
588 if (!tr)
589 tr = &global_trace;
590 tr->ring_buffer_expanded = true;
591 }
592
593 LIST_HEAD(ftrace_trace_arrays);
594
trace_array_get(struct trace_array * this_tr)595 int trace_array_get(struct trace_array *this_tr)
596 {
597 struct trace_array *tr;
598
599 guard(mutex)(&trace_types_lock);
600 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
601 if (tr == this_tr) {
602 tr->ref++;
603 return 0;
604 }
605 }
606
607 return -ENODEV;
608 }
609
__trace_array_put(struct trace_array * this_tr)610 static void __trace_array_put(struct trace_array *this_tr)
611 {
612 WARN_ON(!this_tr->ref);
613 this_tr->ref--;
614 }
615
616 /**
617 * trace_array_put - Decrement the reference counter for this trace array.
618 * @this_tr : pointer to the trace array
619 *
620 * NOTE: Use this when we no longer need the trace array returned by
621 * trace_array_get_by_name(). This ensures the trace array can be later
622 * destroyed.
623 *
624 */
trace_array_put(struct trace_array * this_tr)625 void trace_array_put(struct trace_array *this_tr)
626 {
627 if (!this_tr)
628 return;
629
630 guard(mutex)(&trace_types_lock);
631 __trace_array_put(this_tr);
632 }
633 EXPORT_SYMBOL_GPL(trace_array_put);
634
tracing_check_open_get_tr(struct trace_array * tr)635 int tracing_check_open_get_tr(struct trace_array *tr)
636 {
637 int ret;
638
639 ret = security_locked_down(LOCKDOWN_TRACEFS);
640 if (ret)
641 return ret;
642
643 if (tracing_disabled)
644 return -ENODEV;
645
646 if (tr && trace_array_get(tr) < 0)
647 return -ENODEV;
648
649 return 0;
650 }
651
652 /**
653 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
654 * @filtered_pids: The list of pids to check
655 * @search_pid: The PID to find in @filtered_pids
656 *
657 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
658 */
659 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
661 {
662 return trace_pid_list_is_set(filtered_pids, search_pid);
663 }
664
665 /**
666 * trace_ignore_this_task - should a task be ignored for tracing
667 * @filtered_pids: The list of pids to check
668 * @filtered_no_pids: The list of pids not to be traced
669 * @task: The task that should be ignored if not filtered
670 *
671 * Checks if @task should be traced or not from @filtered_pids.
672 * Returns true if @task should *NOT* be traced.
673 * Returns false if @task should be traced.
674 */
675 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)676 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
677 struct trace_pid_list *filtered_no_pids,
678 struct task_struct *task)
679 {
680 /*
681 * If filtered_no_pids is not empty, and the task's pid is listed
682 * in filtered_no_pids, then return true.
683 * Otherwise, if filtered_pids is empty, that means we can
684 * trace all tasks. If it has content, then only trace pids
685 * within filtered_pids.
686 */
687
688 return (filtered_pids &&
689 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
690 (filtered_no_pids &&
691 trace_find_filtered_pid(filtered_no_pids, task->pid));
692 }
693
694 /**
695 * trace_filter_add_remove_task - Add or remove a task from a pid_list
696 * @pid_list: The list to modify
697 * @self: The current task for fork or NULL for exit
698 * @task: The task to add or remove
699 *
700 * If adding a task, if @self is defined, the task is only added if @self
701 * is also included in @pid_list. This happens on fork and tasks should
702 * only be added when the parent is listed. If @self is NULL, then the
703 * @task pid will be removed from the list, which would happen on exit
704 * of a task.
705 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
707 struct task_struct *self,
708 struct task_struct *task)
709 {
710 if (!pid_list)
711 return;
712
713 /* For forks, we only add if the forking task is listed */
714 if (self) {
715 if (!trace_find_filtered_pid(pid_list, self->pid))
716 return;
717 }
718
719 /* "self" is set for forks, and NULL for exits */
720 if (self)
721 trace_pid_list_set(pid_list, task->pid);
722 else
723 trace_pid_list_clear(pid_list, task->pid);
724 }
725
726 /**
727 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
728 * @pid_list: The pid list to show
729 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
730 * @pos: The position of the file
731 *
732 * This is used by the seq_file "next" operation to iterate the pids
733 * listed in a trace_pid_list structure.
734 *
735 * Returns the pid+1 as we want to display pid of zero, but NULL would
736 * stop the iteration.
737 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
739 {
740 long pid = (unsigned long)v;
741 unsigned int next;
742
743 (*pos)++;
744
745 /* pid already is +1 of the actual previous bit */
746 if (trace_pid_list_next(pid_list, pid, &next) < 0)
747 return NULL;
748
749 pid = next;
750
751 /* Return pid + 1 to allow zero to be represented */
752 return (void *)(pid + 1);
753 }
754
755 /**
756 * trace_pid_start - Used for seq_file to start reading pid lists
757 * @pid_list: The pid list to show
758 * @pos: The position of the file
759 *
760 * This is used by seq_file "start" operation to start the iteration
761 * of listing pids.
762 *
763 * Returns the pid+1 as we want to display pid of zero, but NULL would
764 * stop the iteration.
765 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
767 {
768 unsigned long pid;
769 unsigned int first;
770 loff_t l = 0;
771
772 if (trace_pid_list_first(pid_list, &first) < 0)
773 return NULL;
774
775 pid = first;
776
777 /* Return pid + 1 so that zero can be the exit value */
778 for (pid++; pid && l < *pos;
779 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
780 ;
781 return (void *)pid;
782 }
783
784 /**
785 * trace_pid_show - show the current pid in seq_file processing
786 * @m: The seq_file structure to write into
787 * @v: A void pointer of the pid (+1) value to display
788 *
789 * Can be directly used by seq_file operations to display the current
790 * pid value.
791 */
trace_pid_show(struct seq_file * m,void * v)792 int trace_pid_show(struct seq_file *m, void *v)
793 {
794 unsigned long pid = (unsigned long)v - 1;
795
796 seq_printf(m, "%lu\n", pid);
797 return 0;
798 }
799
800 /* 128 should be much more than enough */
801 #define PID_BUF_SIZE 127
802
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)803 int trace_pid_write(struct trace_pid_list *filtered_pids,
804 struct trace_pid_list **new_pid_list,
805 const char __user *ubuf, size_t cnt)
806 {
807 struct trace_pid_list *pid_list;
808 struct trace_parser parser;
809 unsigned long val;
810 int nr_pids = 0;
811 ssize_t read = 0;
812 ssize_t ret;
813 loff_t pos;
814 pid_t pid;
815
816 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
817 return -ENOMEM;
818
819 /*
820 * Always recreate a new array. The write is an all or nothing
821 * operation. Always create a new array when adding new pids by
822 * the user. If the operation fails, then the current list is
823 * not modified.
824 */
825 pid_list = trace_pid_list_alloc();
826 if (!pid_list) {
827 trace_parser_put(&parser);
828 return -ENOMEM;
829 }
830
831 if (filtered_pids) {
832 /* copy the current bits to the new max */
833 ret = trace_pid_list_first(filtered_pids, &pid);
834 while (!ret) {
835 ret = trace_pid_list_set(pid_list, pid);
836 if (ret < 0)
837 goto out;
838
839 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
840 nr_pids++;
841 }
842 }
843
844 ret = 0;
845 while (cnt > 0) {
846
847 pos = 0;
848
849 ret = trace_get_user(&parser, ubuf, cnt, &pos);
850 if (ret < 0)
851 break;
852
853 read += ret;
854 ubuf += ret;
855 cnt -= ret;
856
857 if (!trace_parser_loaded(&parser))
858 break;
859
860 ret = -EINVAL;
861 if (kstrtoul(parser.buffer, 0, &val))
862 break;
863
864 pid = (pid_t)val;
865
866 if (trace_pid_list_set(pid_list, pid) < 0) {
867 ret = -1;
868 break;
869 }
870 nr_pids++;
871
872 trace_parser_clear(&parser);
873 ret = 0;
874 }
875 out:
876 trace_parser_put(&parser);
877
878 if (ret < 0) {
879 trace_pid_list_free(pid_list);
880 return ret;
881 }
882
883 if (!nr_pids) {
884 /* Cleared the list of pids */
885 trace_pid_list_free(pid_list);
886 pid_list = NULL;
887 }
888
889 *new_pid_list = pid_list;
890
891 return read;
892 }
893
buffer_ftrace_now(struct array_buffer * buf,int cpu)894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
895 {
896 u64 ts;
897
898 /* Early boot up does not have a buffer yet */
899 if (!buf->buffer)
900 return trace_clock_local();
901
902 ts = ring_buffer_time_stamp(buf->buffer);
903 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
904
905 return ts;
906 }
907
ftrace_now(int cpu)908 u64 ftrace_now(int cpu)
909 {
910 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
911 }
912
913 /**
914 * tracing_is_enabled - Show if global_trace has been enabled
915 *
916 * Shows if the global trace has been enabled or not. It uses the
917 * mirror flag "buffer_disabled" to be used in fast paths such as for
918 * the irqsoff tracer. But it may be inaccurate due to races. If you
919 * need to know the accurate state, use tracing_is_on() which is a little
920 * slower, but accurate.
921 */
tracing_is_enabled(void)922 int tracing_is_enabled(void)
923 {
924 /*
925 * For quick access (irqsoff uses this in fast path), just
926 * return the mirror variable of the state of the ring buffer.
927 * It's a little racy, but we don't really care.
928 */
929 return !global_trace.buffer_disabled;
930 }
931
932 /*
933 * trace_buf_size is the size in bytes that is allocated
934 * for a buffer. Note, the number of bytes is always rounded
935 * to page size.
936 *
937 * This number is purposely set to a low number of 16384.
938 * If the dump on oops happens, it will be much appreciated
939 * to not have to wait for all that output. Anyway this can be
940 * boot time and run time configurable.
941 */
942 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
943
944 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
945
946 /* trace_types holds a link list of available tracers. */
947 static struct tracer *trace_types __read_mostly;
948
949 /*
950 * trace_types_lock is used to protect the trace_types list.
951 */
952 DEFINE_MUTEX(trace_types_lock);
953
954 /*
955 * serialize the access of the ring buffer
956 *
957 * ring buffer serializes readers, but it is low level protection.
958 * The validity of the events (which returns by ring_buffer_peek() ..etc)
959 * are not protected by ring buffer.
960 *
961 * The content of events may become garbage if we allow other process consumes
962 * these events concurrently:
963 * A) the page of the consumed events may become a normal page
964 * (not reader page) in ring buffer, and this page will be rewritten
965 * by events producer.
966 * B) The page of the consumed events may become a page for splice_read,
967 * and this page will be returned to system.
968 *
969 * These primitives allow multi process access to different cpu ring buffer
970 * concurrently.
971 *
972 * These primitives don't distinguish read-only and read-consume access.
973 * Multi read-only access are also serialized.
974 */
975
976 #ifdef CONFIG_SMP
977 static DECLARE_RWSEM(all_cpu_access_lock);
978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
979
trace_access_lock(int cpu)980 static inline void trace_access_lock(int cpu)
981 {
982 if (cpu == RING_BUFFER_ALL_CPUS) {
983 /* gain it for accessing the whole ring buffer. */
984 down_write(&all_cpu_access_lock);
985 } else {
986 /* gain it for accessing a cpu ring buffer. */
987
988 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
989 down_read(&all_cpu_access_lock);
990
991 /* Secondly block other access to this @cpu ring buffer. */
992 mutex_lock(&per_cpu(cpu_access_lock, cpu));
993 }
994 }
995
trace_access_unlock(int cpu)996 static inline void trace_access_unlock(int cpu)
997 {
998 if (cpu == RING_BUFFER_ALL_CPUS) {
999 up_write(&all_cpu_access_lock);
1000 } else {
1001 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1002 up_read(&all_cpu_access_lock);
1003 }
1004 }
1005
trace_access_lock_init(void)1006 static inline void trace_access_lock_init(void)
1007 {
1008 int cpu;
1009
1010 for_each_possible_cpu(cpu)
1011 mutex_init(&per_cpu(cpu_access_lock, cpu));
1012 }
1013
1014 #else
1015
1016 static DEFINE_MUTEX(access_lock);
1017
trace_access_lock(int cpu)1018 static inline void trace_access_lock(int cpu)
1019 {
1020 (void)cpu;
1021 mutex_lock(&access_lock);
1022 }
1023
trace_access_unlock(int cpu)1024 static inline void trace_access_unlock(int cpu)
1025 {
1026 (void)cpu;
1027 mutex_unlock(&access_lock);
1028 }
1029
trace_access_lock_init(void)1030 static inline void trace_access_lock_init(void)
1031 {
1032 }
1033
1034 #endif
1035
1036 #ifdef CONFIG_STACKTRACE
1037 static void __ftrace_trace_stack(struct trace_array *tr,
1038 struct trace_buffer *buffer,
1039 unsigned int trace_ctx,
1040 int skip, struct pt_regs *regs);
1041 static inline void ftrace_trace_stack(struct trace_array *tr,
1042 struct trace_buffer *buffer,
1043 unsigned int trace_ctx,
1044 int skip, struct pt_regs *regs);
1045
1046 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1047 static inline void __ftrace_trace_stack(struct trace_array *tr,
1048 struct trace_buffer *buffer,
1049 unsigned int trace_ctx,
1050 int skip, struct pt_regs *regs)
1051 {
1052 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1053 static inline void ftrace_trace_stack(struct trace_array *tr,
1054 struct trace_buffer *buffer,
1055 unsigned long trace_ctx,
1056 int skip, struct pt_regs *regs)
1057 {
1058 }
1059
1060 #endif
1061
1062 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1063 trace_event_setup(struct ring_buffer_event *event,
1064 int type, unsigned int trace_ctx)
1065 {
1066 struct trace_entry *ent = ring_buffer_event_data(event);
1067
1068 tracing_generic_entry_update(ent, type, trace_ctx);
1069 }
1070
1071 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1073 int type,
1074 unsigned long len,
1075 unsigned int trace_ctx)
1076 {
1077 struct ring_buffer_event *event;
1078
1079 event = ring_buffer_lock_reserve(buffer, len);
1080 if (event != NULL)
1081 trace_event_setup(event, type, trace_ctx);
1082
1083 return event;
1084 }
1085
tracer_tracing_on(struct trace_array * tr)1086 void tracer_tracing_on(struct trace_array *tr)
1087 {
1088 if (tr->array_buffer.buffer)
1089 ring_buffer_record_on(tr->array_buffer.buffer);
1090 /*
1091 * This flag is looked at when buffers haven't been allocated
1092 * yet, or by some tracers (like irqsoff), that just want to
1093 * know if the ring buffer has been disabled, but it can handle
1094 * races of where it gets disabled but we still do a record.
1095 * As the check is in the fast path of the tracers, it is more
1096 * important to be fast than accurate.
1097 */
1098 tr->buffer_disabled = 0;
1099 }
1100
1101 /**
1102 * tracing_on - enable tracing buffers
1103 *
1104 * This function enables tracing buffers that may have been
1105 * disabled with tracing_off.
1106 */
tracing_on(void)1107 void tracing_on(void)
1108 {
1109 tracer_tracing_on(&global_trace);
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_on);
1112
1113
1114 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1116 {
1117 __this_cpu_write(trace_taskinfo_save, true);
1118
1119 /* If this is the temp buffer, we need to commit fully */
1120 if (this_cpu_read(trace_buffered_event) == event) {
1121 /* Length is in event->array[0] */
1122 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1123 /* Release the temp buffer */
1124 this_cpu_dec(trace_buffered_event_cnt);
1125 /* ring_buffer_unlock_commit() enables preemption */
1126 preempt_enable_notrace();
1127 } else
1128 ring_buffer_unlock_commit(buffer);
1129 }
1130
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1132 const char *str, int size)
1133 {
1134 struct ring_buffer_event *event;
1135 struct trace_buffer *buffer;
1136 struct print_entry *entry;
1137 unsigned int trace_ctx;
1138 int alloc;
1139
1140 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1141 return 0;
1142
1143 if (unlikely(tracing_selftest_running && tr == &global_trace))
1144 return 0;
1145
1146 if (unlikely(tracing_disabled))
1147 return 0;
1148
1149 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1150
1151 trace_ctx = tracing_gen_ctx();
1152 buffer = tr->array_buffer.buffer;
1153 guard(ring_buffer_nest)(buffer);
1154 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1155 trace_ctx);
1156 if (!event)
1157 return 0;
1158
1159 entry = ring_buffer_event_data(event);
1160 entry->ip = ip;
1161
1162 memcpy(&entry->buf, str, size);
1163
1164 /* Add a newline if necessary */
1165 if (entry->buf[size - 1] != '\n') {
1166 entry->buf[size] = '\n';
1167 entry->buf[size + 1] = '\0';
1168 } else
1169 entry->buf[size] = '\0';
1170
1171 __buffer_unlock_commit(buffer, event);
1172 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1173 return size;
1174 }
1175 EXPORT_SYMBOL_GPL(__trace_array_puts);
1176
1177 /**
1178 * __trace_puts - write a constant string into the trace buffer.
1179 * @ip: The address of the caller
1180 * @str: The constant string to write
1181 * @size: The size of the string.
1182 */
__trace_puts(unsigned long ip,const char * str,int size)1183 int __trace_puts(unsigned long ip, const char *str, int size)
1184 {
1185 return __trace_array_puts(printk_trace, ip, str, size);
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_puts);
1188
1189 /**
1190 * __trace_bputs - write the pointer to a constant string into trace buffer
1191 * @ip: The address of the caller
1192 * @str: The constant string to write to the buffer to
1193 */
__trace_bputs(unsigned long ip,const char * str)1194 int __trace_bputs(unsigned long ip, const char *str)
1195 {
1196 struct trace_array *tr = READ_ONCE(printk_trace);
1197 struct ring_buffer_event *event;
1198 struct trace_buffer *buffer;
1199 struct bputs_entry *entry;
1200 unsigned int trace_ctx;
1201 int size = sizeof(struct bputs_entry);
1202
1203 if (!printk_binsafe(tr))
1204 return __trace_puts(ip, str, strlen(str));
1205
1206 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
1207 return 0;
1208
1209 if (unlikely(tracing_selftest_running || tracing_disabled))
1210 return 0;
1211
1212 trace_ctx = tracing_gen_ctx();
1213 buffer = tr->array_buffer.buffer;
1214
1215 guard(ring_buffer_nest)(buffer);
1216 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1217 trace_ctx);
1218 if (!event)
1219 return 0;
1220
1221 entry = ring_buffer_event_data(event);
1222 entry->ip = ip;
1223 entry->str = str;
1224
1225 __buffer_unlock_commit(buffer, event);
1226 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1227
1228 return 1;
1229 }
1230 EXPORT_SYMBOL_GPL(__trace_bputs);
1231
1232 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1233 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1234 void *cond_data)
1235 {
1236 struct tracer *tracer = tr->current_trace;
1237 unsigned long flags;
1238
1239 if (in_nmi()) {
1240 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1241 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1242 return;
1243 }
1244
1245 if (!tr->allocated_snapshot) {
1246 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1247 trace_array_puts(tr, "*** stopping trace here! ***\n");
1248 tracer_tracing_off(tr);
1249 return;
1250 }
1251
1252 /* Note, snapshot can not be used when the tracer uses it */
1253 if (tracer->use_max_tr) {
1254 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1255 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1256 return;
1257 }
1258
1259 if (tr->mapped) {
1260 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1261 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1262 return;
1263 }
1264
1265 local_irq_save(flags);
1266 update_max_tr(tr, current, smp_processor_id(), cond_data);
1267 local_irq_restore(flags);
1268 }
1269
tracing_snapshot_instance(struct trace_array * tr)1270 void tracing_snapshot_instance(struct trace_array *tr)
1271 {
1272 tracing_snapshot_instance_cond(tr, NULL);
1273 }
1274
1275 /**
1276 * tracing_snapshot - take a snapshot of the current buffer.
1277 *
1278 * This causes a swap between the snapshot buffer and the current live
1279 * tracing buffer. You can use this to take snapshots of the live
1280 * trace when some condition is triggered, but continue to trace.
1281 *
1282 * Note, make sure to allocate the snapshot with either
1283 * a tracing_snapshot_alloc(), or by doing it manually
1284 * with: echo 1 > /sys/kernel/tracing/snapshot
1285 *
1286 * If the snapshot buffer is not allocated, it will stop tracing.
1287 * Basically making a permanent snapshot.
1288 */
tracing_snapshot(void)1289 void tracing_snapshot(void)
1290 {
1291 struct trace_array *tr = &global_trace;
1292
1293 tracing_snapshot_instance(tr);
1294 }
1295 EXPORT_SYMBOL_GPL(tracing_snapshot);
1296
1297 /**
1298 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1299 * @tr: The tracing instance to snapshot
1300 * @cond_data: The data to be tested conditionally, and possibly saved
1301 *
1302 * This is the same as tracing_snapshot() except that the snapshot is
1303 * conditional - the snapshot will only happen if the
1304 * cond_snapshot.update() implementation receiving the cond_data
1305 * returns true, which means that the trace array's cond_snapshot
1306 * update() operation used the cond_data to determine whether the
1307 * snapshot should be taken, and if it was, presumably saved it along
1308 * with the snapshot.
1309 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1311 {
1312 tracing_snapshot_instance_cond(tr, cond_data);
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1315
1316 /**
1317 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1318 * @tr: The tracing instance
1319 *
1320 * When the user enables a conditional snapshot using
1321 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1322 * with the snapshot. This accessor is used to retrieve it.
1323 *
1324 * Should not be called from cond_snapshot.update(), since it takes
1325 * the tr->max_lock lock, which the code calling
1326 * cond_snapshot.update() has already done.
1327 *
1328 * Returns the cond_data associated with the trace array's snapshot.
1329 */
tracing_cond_snapshot_data(struct trace_array * tr)1330 void *tracing_cond_snapshot_data(struct trace_array *tr)
1331 {
1332 void *cond_data = NULL;
1333
1334 local_irq_disable();
1335 arch_spin_lock(&tr->max_lock);
1336
1337 if (tr->cond_snapshot)
1338 cond_data = tr->cond_snapshot->cond_data;
1339
1340 arch_spin_unlock(&tr->max_lock);
1341 local_irq_enable();
1342
1343 return cond_data;
1344 }
1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1346
1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1348 struct array_buffer *size_buf, int cpu_id);
1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1350
tracing_alloc_snapshot_instance(struct trace_array * tr)1351 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1352 {
1353 int order;
1354 int ret;
1355
1356 if (!tr->allocated_snapshot) {
1357
1358 /* Make the snapshot buffer have the same order as main buffer */
1359 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1360 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1361 if (ret < 0)
1362 return ret;
1363
1364 /* allocate spare buffer */
1365 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1366 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1367 if (ret < 0)
1368 return ret;
1369
1370 tr->allocated_snapshot = true;
1371 }
1372
1373 return 0;
1374 }
1375
free_snapshot(struct trace_array * tr)1376 static void free_snapshot(struct trace_array *tr)
1377 {
1378 /*
1379 * We don't free the ring buffer. instead, resize it because
1380 * The max_tr ring buffer has some state (e.g. ring->clock) and
1381 * we want preserve it.
1382 */
1383 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1384 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1385 set_buffer_entries(&tr->max_buffer, 1);
1386 tracing_reset_online_cpus(&tr->max_buffer);
1387 tr->allocated_snapshot = false;
1388 }
1389
tracing_arm_snapshot_locked(struct trace_array * tr)1390 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1391 {
1392 int ret;
1393
1394 lockdep_assert_held(&trace_types_lock);
1395
1396 spin_lock(&tr->snapshot_trigger_lock);
1397 if (tr->snapshot == UINT_MAX || tr->mapped) {
1398 spin_unlock(&tr->snapshot_trigger_lock);
1399 return -EBUSY;
1400 }
1401
1402 tr->snapshot++;
1403 spin_unlock(&tr->snapshot_trigger_lock);
1404
1405 ret = tracing_alloc_snapshot_instance(tr);
1406 if (ret) {
1407 spin_lock(&tr->snapshot_trigger_lock);
1408 tr->snapshot--;
1409 spin_unlock(&tr->snapshot_trigger_lock);
1410 }
1411
1412 return ret;
1413 }
1414
tracing_arm_snapshot(struct trace_array * tr)1415 int tracing_arm_snapshot(struct trace_array *tr)
1416 {
1417 guard(mutex)(&trace_types_lock);
1418 return tracing_arm_snapshot_locked(tr);
1419 }
1420
tracing_disarm_snapshot(struct trace_array * tr)1421 void tracing_disarm_snapshot(struct trace_array *tr)
1422 {
1423 spin_lock(&tr->snapshot_trigger_lock);
1424 if (!WARN_ON(!tr->snapshot))
1425 tr->snapshot--;
1426 spin_unlock(&tr->snapshot_trigger_lock);
1427 }
1428
1429 /**
1430 * tracing_alloc_snapshot - allocate snapshot buffer.
1431 *
1432 * This only allocates the snapshot buffer if it isn't already
1433 * allocated - it doesn't also take a snapshot.
1434 *
1435 * This is meant to be used in cases where the snapshot buffer needs
1436 * to be set up for events that can't sleep but need to be able to
1437 * trigger a snapshot.
1438 */
tracing_alloc_snapshot(void)1439 int tracing_alloc_snapshot(void)
1440 {
1441 struct trace_array *tr = &global_trace;
1442 int ret;
1443
1444 ret = tracing_alloc_snapshot_instance(tr);
1445 WARN_ON(ret < 0);
1446
1447 return ret;
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1450
1451 /**
1452 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1453 *
1454 * This is similar to tracing_snapshot(), but it will allocate the
1455 * snapshot buffer if it isn't already allocated. Use this only
1456 * where it is safe to sleep, as the allocation may sleep.
1457 *
1458 * This causes a swap between the snapshot buffer and the current live
1459 * tracing buffer. You can use this to take snapshots of the live
1460 * trace when some condition is triggered, but continue to trace.
1461 */
tracing_snapshot_alloc(void)1462 void tracing_snapshot_alloc(void)
1463 {
1464 int ret;
1465
1466 ret = tracing_alloc_snapshot();
1467 if (ret < 0)
1468 return;
1469
1470 tracing_snapshot();
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1473
1474 /**
1475 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1476 * @tr: The tracing instance
1477 * @cond_data: User data to associate with the snapshot
1478 * @update: Implementation of the cond_snapshot update function
1479 *
1480 * Check whether the conditional snapshot for the given instance has
1481 * already been enabled, or if the current tracer is already using a
1482 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1483 * save the cond_data and update function inside.
1484 *
1485 * Returns 0 if successful, error otherwise.
1486 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1488 cond_update_fn_t update)
1489 {
1490 struct cond_snapshot *cond_snapshot __free(kfree) =
1491 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1492 int ret;
1493
1494 if (!cond_snapshot)
1495 return -ENOMEM;
1496
1497 cond_snapshot->cond_data = cond_data;
1498 cond_snapshot->update = update;
1499
1500 guard(mutex)(&trace_types_lock);
1501
1502 if (tr->current_trace->use_max_tr)
1503 return -EBUSY;
1504
1505 /*
1506 * The cond_snapshot can only change to NULL without the
1507 * trace_types_lock. We don't care if we race with it going
1508 * to NULL, but we want to make sure that it's not set to
1509 * something other than NULL when we get here, which we can
1510 * do safely with only holding the trace_types_lock and not
1511 * having to take the max_lock.
1512 */
1513 if (tr->cond_snapshot)
1514 return -EBUSY;
1515
1516 ret = tracing_arm_snapshot_locked(tr);
1517 if (ret)
1518 return ret;
1519
1520 local_irq_disable();
1521 arch_spin_lock(&tr->max_lock);
1522 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1523 arch_spin_unlock(&tr->max_lock);
1524 local_irq_enable();
1525
1526 return 0;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1529
1530 /**
1531 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1532 * @tr: The tracing instance
1533 *
1534 * Check whether the conditional snapshot for the given instance is
1535 * enabled; if so, free the cond_snapshot associated with it,
1536 * otherwise return -EINVAL.
1537 *
1538 * Returns 0 if successful, error otherwise.
1539 */
tracing_snapshot_cond_disable(struct trace_array * tr)1540 int tracing_snapshot_cond_disable(struct trace_array *tr)
1541 {
1542 int ret = 0;
1543
1544 local_irq_disable();
1545 arch_spin_lock(&tr->max_lock);
1546
1547 if (!tr->cond_snapshot)
1548 ret = -EINVAL;
1549 else {
1550 kfree(tr->cond_snapshot);
1551 tr->cond_snapshot = NULL;
1552 }
1553
1554 arch_spin_unlock(&tr->max_lock);
1555 local_irq_enable();
1556
1557 tracing_disarm_snapshot(tr);
1558
1559 return ret;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #else
tracing_snapshot(void)1563 void tracing_snapshot(void)
1564 {
1565 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1566 }
1567 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1569 {
1570 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1571 }
1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1573 int tracing_alloc_snapshot(void)
1574 {
1575 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1576 return -ENODEV;
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1579 void tracing_snapshot_alloc(void)
1580 {
1581 /* Give warning */
1582 tracing_snapshot();
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1585 void *tracing_cond_snapshot_data(struct trace_array *tr)
1586 {
1587 return NULL;
1588 }
1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1591 {
1592 return -ENODEV;
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1595 int tracing_snapshot_cond_disable(struct trace_array *tr)
1596 {
1597 return false;
1598 }
1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1600 #define free_snapshot(tr) do { } while (0)
1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1602 #endif /* CONFIG_TRACER_SNAPSHOT */
1603
tracer_tracing_off(struct trace_array * tr)1604 void tracer_tracing_off(struct trace_array *tr)
1605 {
1606 if (tr->array_buffer.buffer)
1607 ring_buffer_record_off(tr->array_buffer.buffer);
1608 /*
1609 * This flag is looked at when buffers haven't been allocated
1610 * yet, or by some tracers (like irqsoff), that just want to
1611 * know if the ring buffer has been disabled, but it can handle
1612 * races of where it gets disabled but we still do a record.
1613 * As the check is in the fast path of the tracers, it is more
1614 * important to be fast than accurate.
1615 */
1616 tr->buffer_disabled = 1;
1617 }
1618
1619 /**
1620 * tracer_tracing_disable() - temporary disable the buffer from write
1621 * @tr: The trace array to disable its buffer for
1622 *
1623 * Expects trace_tracing_enable() to re-enable tracing.
1624 * The difference between this and tracer_tracing_off() is that this
1625 * is a counter and can nest, whereas, tracer_tracing_off() can
1626 * be called multiple times and a single trace_tracing_on() will
1627 * enable it.
1628 */
tracer_tracing_disable(struct trace_array * tr)1629 void tracer_tracing_disable(struct trace_array *tr)
1630 {
1631 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1632 return;
1633
1634 ring_buffer_record_disable(tr->array_buffer.buffer);
1635 }
1636
1637 /**
1638 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1639 * @tr: The trace array that had tracer_tracincg_disable() called on it
1640 *
1641 * This is called after tracer_tracing_disable() has been called on @tr,
1642 * when it's safe to re-enable tracing.
1643 */
tracer_tracing_enable(struct trace_array * tr)1644 void tracer_tracing_enable(struct trace_array *tr)
1645 {
1646 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1647 return;
1648
1649 ring_buffer_record_enable(tr->array_buffer.buffer);
1650 }
1651
1652 /**
1653 * tracing_off - turn off tracing buffers
1654 *
1655 * This function stops the tracing buffers from recording data.
1656 * It does not disable any overhead the tracers themselves may
1657 * be causing. This function simply causes all recording to
1658 * the ring buffers to fail.
1659 */
tracing_off(void)1660 void tracing_off(void)
1661 {
1662 tracer_tracing_off(&global_trace);
1663 }
1664 EXPORT_SYMBOL_GPL(tracing_off);
1665
disable_trace_on_warning(void)1666 void disable_trace_on_warning(void)
1667 {
1668 if (__disable_trace_on_warning) {
1669 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1670 "Disabling tracing due to warning\n");
1671 tracing_off();
1672 }
1673 }
1674
1675 /**
1676 * tracer_tracing_is_on - show real state of ring buffer enabled
1677 * @tr : the trace array to know if ring buffer is enabled
1678 *
1679 * Shows real state of the ring buffer if it is enabled or not.
1680 */
tracer_tracing_is_on(struct trace_array * tr)1681 bool tracer_tracing_is_on(struct trace_array *tr)
1682 {
1683 if (tr->array_buffer.buffer)
1684 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1685 return !tr->buffer_disabled;
1686 }
1687
1688 /**
1689 * tracing_is_on - show state of ring buffers enabled
1690 */
tracing_is_on(void)1691 int tracing_is_on(void)
1692 {
1693 return tracer_tracing_is_on(&global_trace);
1694 }
1695 EXPORT_SYMBOL_GPL(tracing_is_on);
1696
set_buf_size(char * str)1697 static int __init set_buf_size(char *str)
1698 {
1699 unsigned long buf_size;
1700
1701 if (!str)
1702 return 0;
1703 buf_size = memparse(str, &str);
1704 /*
1705 * nr_entries can not be zero and the startup
1706 * tests require some buffer space. Therefore
1707 * ensure we have at least 4096 bytes of buffer.
1708 */
1709 trace_buf_size = max(4096UL, buf_size);
1710 return 1;
1711 }
1712 __setup("trace_buf_size=", set_buf_size);
1713
set_tracing_thresh(char * str)1714 static int __init set_tracing_thresh(char *str)
1715 {
1716 unsigned long threshold;
1717 int ret;
1718
1719 if (!str)
1720 return 0;
1721 ret = kstrtoul(str, 0, &threshold);
1722 if (ret < 0)
1723 return 0;
1724 tracing_thresh = threshold * 1000;
1725 return 1;
1726 }
1727 __setup("tracing_thresh=", set_tracing_thresh);
1728
nsecs_to_usecs(unsigned long nsecs)1729 unsigned long nsecs_to_usecs(unsigned long nsecs)
1730 {
1731 return nsecs / 1000;
1732 }
1733
1734 /*
1735 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1736 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1737 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1738 * of strings in the order that the evals (enum) were defined.
1739 */
1740 #undef C
1741 #define C(a, b) b
1742
1743 /* These must match the bit positions in trace_iterator_flags */
1744 static const char *trace_options[] = {
1745 TRACE_FLAGS
1746 NULL
1747 };
1748
1749 static struct {
1750 u64 (*func)(void);
1751 const char *name;
1752 int in_ns; /* is this clock in nanoseconds? */
1753 } trace_clocks[] = {
1754 { trace_clock_local, "local", 1 },
1755 { trace_clock_global, "global", 1 },
1756 { trace_clock_counter, "counter", 0 },
1757 { trace_clock_jiffies, "uptime", 0 },
1758 { trace_clock, "perf", 1 },
1759 { ktime_get_mono_fast_ns, "mono", 1 },
1760 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1761 { ktime_get_boot_fast_ns, "boot", 1 },
1762 { ktime_get_tai_fast_ns, "tai", 1 },
1763 ARCH_TRACE_CLOCKS
1764 };
1765
trace_clock_in_ns(struct trace_array * tr)1766 bool trace_clock_in_ns(struct trace_array *tr)
1767 {
1768 if (trace_clocks[tr->clock_id].in_ns)
1769 return true;
1770
1771 return false;
1772 }
1773
1774 /*
1775 * trace_parser_get_init - gets the buffer for trace parser
1776 */
trace_parser_get_init(struct trace_parser * parser,int size)1777 int trace_parser_get_init(struct trace_parser *parser, int size)
1778 {
1779 memset(parser, 0, sizeof(*parser));
1780
1781 parser->buffer = kmalloc(size, GFP_KERNEL);
1782 if (!parser->buffer)
1783 return 1;
1784
1785 parser->size = size;
1786 return 0;
1787 }
1788
1789 /*
1790 * trace_parser_put - frees the buffer for trace parser
1791 */
trace_parser_put(struct trace_parser * parser)1792 void trace_parser_put(struct trace_parser *parser)
1793 {
1794 kfree(parser->buffer);
1795 parser->buffer = NULL;
1796 }
1797
1798 /*
1799 * trace_get_user - reads the user input string separated by space
1800 * (matched by isspace(ch))
1801 *
1802 * For each string found the 'struct trace_parser' is updated,
1803 * and the function returns.
1804 *
1805 * Returns number of bytes read.
1806 *
1807 * See kernel/trace/trace.h for 'struct trace_parser' details.
1808 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1810 size_t cnt, loff_t *ppos)
1811 {
1812 char ch;
1813 size_t read = 0;
1814 ssize_t ret;
1815
1816 if (!*ppos)
1817 trace_parser_clear(parser);
1818
1819 ret = get_user(ch, ubuf++);
1820 if (ret)
1821 goto fail;
1822
1823 read++;
1824 cnt--;
1825
1826 /*
1827 * The parser is not finished with the last write,
1828 * continue reading the user input without skipping spaces.
1829 */
1830 if (!parser->cont) {
1831 /* skip white space */
1832 while (cnt && isspace(ch)) {
1833 ret = get_user(ch, ubuf++);
1834 if (ret)
1835 goto fail;
1836 read++;
1837 cnt--;
1838 }
1839
1840 parser->idx = 0;
1841
1842 /* only spaces were written */
1843 if (isspace(ch) || !ch) {
1844 *ppos += read;
1845 return read;
1846 }
1847 }
1848
1849 /* read the non-space input */
1850 while (cnt && !isspace(ch) && ch) {
1851 if (parser->idx < parser->size - 1)
1852 parser->buffer[parser->idx++] = ch;
1853 else {
1854 ret = -EINVAL;
1855 goto fail;
1856 }
1857
1858 ret = get_user(ch, ubuf++);
1859 if (ret)
1860 goto fail;
1861 read++;
1862 cnt--;
1863 }
1864
1865 /* We either got finished input or we have to wait for another call. */
1866 if (isspace(ch) || !ch) {
1867 parser->buffer[parser->idx] = 0;
1868 parser->cont = false;
1869 } else if (parser->idx < parser->size - 1) {
1870 parser->cont = true;
1871 parser->buffer[parser->idx++] = ch;
1872 /* Make sure the parsed string always terminates with '\0'. */
1873 parser->buffer[parser->idx] = 0;
1874 } else {
1875 ret = -EINVAL;
1876 goto fail;
1877 }
1878
1879 *ppos += read;
1880 return read;
1881 fail:
1882 trace_parser_fail(parser);
1883 return ret;
1884 }
1885
1886 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1888 {
1889 int len;
1890
1891 if (trace_seq_used(s) <= s->readpos)
1892 return -EBUSY;
1893
1894 len = trace_seq_used(s) - s->readpos;
1895 if (cnt > len)
1896 cnt = len;
1897 memcpy(buf, s->buffer + s->readpos, cnt);
1898
1899 s->readpos += cnt;
1900 return cnt;
1901 }
1902
1903 unsigned long __read_mostly tracing_thresh;
1904
1905 #ifdef CONFIG_TRACER_MAX_TRACE
1906 static const struct file_operations tracing_max_lat_fops;
1907
1908 #ifdef LATENCY_FS_NOTIFY
1909
1910 static struct workqueue_struct *fsnotify_wq;
1911
latency_fsnotify_workfn(struct work_struct * work)1912 static void latency_fsnotify_workfn(struct work_struct *work)
1913 {
1914 struct trace_array *tr = container_of(work, struct trace_array,
1915 fsnotify_work);
1916 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1917 }
1918
latency_fsnotify_workfn_irq(struct irq_work * iwork)1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1920 {
1921 struct trace_array *tr = container_of(iwork, struct trace_array,
1922 fsnotify_irqwork);
1923 queue_work(fsnotify_wq, &tr->fsnotify_work);
1924 }
1925
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1926 static void trace_create_maxlat_file(struct trace_array *tr,
1927 struct dentry *d_tracer)
1928 {
1929 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1930 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1931 tr->d_max_latency = trace_create_file("tracing_max_latency",
1932 TRACE_MODE_WRITE,
1933 d_tracer, tr,
1934 &tracing_max_lat_fops);
1935 }
1936
latency_fsnotify_init(void)1937 __init static int latency_fsnotify_init(void)
1938 {
1939 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1940 WQ_UNBOUND | WQ_HIGHPRI, 0);
1941 if (!fsnotify_wq) {
1942 pr_err("Unable to allocate tr_max_lat_wq\n");
1943 return -ENOMEM;
1944 }
1945 return 0;
1946 }
1947
1948 late_initcall_sync(latency_fsnotify_init);
1949
latency_fsnotify(struct trace_array * tr)1950 void latency_fsnotify(struct trace_array *tr)
1951 {
1952 if (!fsnotify_wq)
1953 return;
1954 /*
1955 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1956 * possible that we are called from __schedule() or do_idle(), which
1957 * could cause a deadlock.
1958 */
1959 irq_work_queue(&tr->fsnotify_irqwork);
1960 }
1961
1962 #else /* !LATENCY_FS_NOTIFY */
1963
1964 #define trace_create_maxlat_file(tr, d_tracer) \
1965 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1966 d_tracer, tr, &tracing_max_lat_fops)
1967
1968 #endif
1969
1970 /*
1971 * Copy the new maximum trace into the separate maximum-trace
1972 * structure. (this way the maximum trace is permanently saved,
1973 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1974 */
1975 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1977 {
1978 struct array_buffer *trace_buf = &tr->array_buffer;
1979 struct array_buffer *max_buf = &tr->max_buffer;
1980 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1981 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1982
1983 max_buf->cpu = cpu;
1984 max_buf->time_start = data->preempt_timestamp;
1985
1986 max_data->saved_latency = tr->max_latency;
1987 max_data->critical_start = data->critical_start;
1988 max_data->critical_end = data->critical_end;
1989
1990 strscpy(max_data->comm, tsk->comm);
1991 max_data->pid = tsk->pid;
1992 /*
1993 * If tsk == current, then use current_uid(), as that does not use
1994 * RCU. The irq tracer can be called out of RCU scope.
1995 */
1996 if (tsk == current)
1997 max_data->uid = current_uid();
1998 else
1999 max_data->uid = task_uid(tsk);
2000
2001 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2002 max_data->policy = tsk->policy;
2003 max_data->rt_priority = tsk->rt_priority;
2004
2005 /* record this tasks comm */
2006 tracing_record_cmdline(tsk);
2007 latency_fsnotify(tr);
2008 }
2009
2010 /**
2011 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2012 * @tr: tracer
2013 * @tsk: the task with the latency
2014 * @cpu: The cpu that initiated the trace.
2015 * @cond_data: User data associated with a conditional snapshot
2016 *
2017 * Flip the buffers between the @tr and the max_tr and record information
2018 * about which task was the cause of this latency.
2019 */
2020 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2022 void *cond_data)
2023 {
2024 if (tr->stop_count)
2025 return;
2026
2027 WARN_ON_ONCE(!irqs_disabled());
2028
2029 if (!tr->allocated_snapshot) {
2030 /* Only the nop tracer should hit this when disabling */
2031 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2032 return;
2033 }
2034
2035 arch_spin_lock(&tr->max_lock);
2036
2037 /* Inherit the recordable setting from array_buffer */
2038 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2039 ring_buffer_record_on(tr->max_buffer.buffer);
2040 else
2041 ring_buffer_record_off(tr->max_buffer.buffer);
2042
2043 #ifdef CONFIG_TRACER_SNAPSHOT
2044 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2045 arch_spin_unlock(&tr->max_lock);
2046 return;
2047 }
2048 #endif
2049 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2050
2051 __update_max_tr(tr, tsk, cpu);
2052
2053 arch_spin_unlock(&tr->max_lock);
2054
2055 /* Any waiters on the old snapshot buffer need to wake up */
2056 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2057 }
2058
2059 /**
2060 * update_max_tr_single - only copy one trace over, and reset the rest
2061 * @tr: tracer
2062 * @tsk: task with the latency
2063 * @cpu: the cpu of the buffer to copy.
2064 *
2065 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2066 */
2067 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2069 {
2070 int ret;
2071
2072 if (tr->stop_count)
2073 return;
2074
2075 WARN_ON_ONCE(!irqs_disabled());
2076 if (!tr->allocated_snapshot) {
2077 /* Only the nop tracer should hit this when disabling */
2078 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2079 return;
2080 }
2081
2082 arch_spin_lock(&tr->max_lock);
2083
2084 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2085
2086 if (ret == -EBUSY) {
2087 /*
2088 * We failed to swap the buffer due to a commit taking
2089 * place on this CPU. We fail to record, but we reset
2090 * the max trace buffer (no one writes directly to it)
2091 * and flag that it failed.
2092 * Another reason is resize is in progress.
2093 */
2094 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2095 "Failed to swap buffers due to commit or resize in progress\n");
2096 }
2097
2098 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2099
2100 __update_max_tr(tr, tsk, cpu);
2101 arch_spin_unlock(&tr->max_lock);
2102 }
2103
2104 #endif /* CONFIG_TRACER_MAX_TRACE */
2105
2106 struct pipe_wait {
2107 struct trace_iterator *iter;
2108 int wait_index;
2109 };
2110
wait_pipe_cond(void * data)2111 static bool wait_pipe_cond(void *data)
2112 {
2113 struct pipe_wait *pwait = data;
2114 struct trace_iterator *iter = pwait->iter;
2115
2116 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2117 return true;
2118
2119 return iter->closed;
2120 }
2121
wait_on_pipe(struct trace_iterator * iter,int full)2122 static int wait_on_pipe(struct trace_iterator *iter, int full)
2123 {
2124 struct pipe_wait pwait;
2125 int ret;
2126
2127 /* Iterators are static, they should be filled or empty */
2128 if (trace_buffer_iter(iter, iter->cpu_file))
2129 return 0;
2130
2131 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2132 pwait.iter = iter;
2133
2134 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2135 wait_pipe_cond, &pwait);
2136
2137 #ifdef CONFIG_TRACER_MAX_TRACE
2138 /*
2139 * Make sure this is still the snapshot buffer, as if a snapshot were
2140 * to happen, this would now be the main buffer.
2141 */
2142 if (iter->snapshot)
2143 iter->array_buffer = &iter->tr->max_buffer;
2144 #endif
2145 return ret;
2146 }
2147
2148 #ifdef CONFIG_FTRACE_STARTUP_TEST
2149 static bool selftests_can_run;
2150
2151 struct trace_selftests {
2152 struct list_head list;
2153 struct tracer *type;
2154 };
2155
2156 static LIST_HEAD(postponed_selftests);
2157
save_selftest(struct tracer * type)2158 static int save_selftest(struct tracer *type)
2159 {
2160 struct trace_selftests *selftest;
2161
2162 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2163 if (!selftest)
2164 return -ENOMEM;
2165
2166 selftest->type = type;
2167 list_add(&selftest->list, &postponed_selftests);
2168 return 0;
2169 }
2170
run_tracer_selftest(struct tracer * type)2171 static int run_tracer_selftest(struct tracer *type)
2172 {
2173 struct trace_array *tr = &global_trace;
2174 struct tracer_flags *saved_flags = tr->current_trace_flags;
2175 struct tracer *saved_tracer = tr->current_trace;
2176 int ret;
2177
2178 if (!type->selftest || tracing_selftest_disabled)
2179 return 0;
2180
2181 /*
2182 * If a tracer registers early in boot up (before scheduling is
2183 * initialized and such), then do not run its selftests yet.
2184 * Instead, run it a little later in the boot process.
2185 */
2186 if (!selftests_can_run)
2187 return save_selftest(type);
2188
2189 if (!tracing_is_on()) {
2190 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2191 type->name);
2192 return 0;
2193 }
2194
2195 /*
2196 * Run a selftest on this tracer.
2197 * Here we reset the trace buffer, and set the current
2198 * tracer to be this tracer. The tracer can then run some
2199 * internal tracing to verify that everything is in order.
2200 * If we fail, we do not register this tracer.
2201 */
2202 tracing_reset_online_cpus(&tr->array_buffer);
2203
2204 tr->current_trace = type;
2205 tr->current_trace_flags = type->flags ? : type->default_flags;
2206
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 if (type->use_max_tr) {
2209 /* If we expanded the buffers, make sure the max is expanded too */
2210 if (tr->ring_buffer_expanded)
2211 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 RING_BUFFER_ALL_CPUS);
2213 tr->allocated_snapshot = true;
2214 }
2215 #endif
2216
2217 /* the test is responsible for initializing and enabling */
2218 pr_info("Testing tracer %s: ", type->name);
2219 ret = type->selftest(type, tr);
2220 /* the test is responsible for resetting too */
2221 tr->current_trace = saved_tracer;
2222 tr->current_trace_flags = saved_flags;
2223 if (ret) {
2224 printk(KERN_CONT "FAILED!\n");
2225 /* Add the warning after printing 'FAILED' */
2226 WARN_ON(1);
2227 return -1;
2228 }
2229 /* Only reset on passing, to avoid touching corrupted buffers */
2230 tracing_reset_online_cpus(&tr->array_buffer);
2231
2232 #ifdef CONFIG_TRACER_MAX_TRACE
2233 if (type->use_max_tr) {
2234 tr->allocated_snapshot = false;
2235
2236 /* Shrink the max buffer again */
2237 if (tr->ring_buffer_expanded)
2238 ring_buffer_resize(tr->max_buffer.buffer, 1,
2239 RING_BUFFER_ALL_CPUS);
2240 }
2241 #endif
2242
2243 printk(KERN_CONT "PASSED\n");
2244 return 0;
2245 }
2246
do_run_tracer_selftest(struct tracer * type)2247 static int do_run_tracer_selftest(struct tracer *type)
2248 {
2249 int ret;
2250
2251 /*
2252 * Tests can take a long time, especially if they are run one after the
2253 * other, as does happen during bootup when all the tracers are
2254 * registered. This could cause the soft lockup watchdog to trigger.
2255 */
2256 cond_resched();
2257
2258 tracing_selftest_running = true;
2259 ret = run_tracer_selftest(type);
2260 tracing_selftest_running = false;
2261
2262 return ret;
2263 }
2264
init_trace_selftests(void)2265 static __init int init_trace_selftests(void)
2266 {
2267 struct trace_selftests *p, *n;
2268 struct tracer *t, **last;
2269 int ret;
2270
2271 selftests_can_run = true;
2272
2273 guard(mutex)(&trace_types_lock);
2274
2275 if (list_empty(&postponed_selftests))
2276 return 0;
2277
2278 pr_info("Running postponed tracer tests:\n");
2279
2280 tracing_selftest_running = true;
2281 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2282 /* This loop can take minutes when sanitizers are enabled, so
2283 * lets make sure we allow RCU processing.
2284 */
2285 cond_resched();
2286 ret = run_tracer_selftest(p->type);
2287 /* If the test fails, then warn and remove from available_tracers */
2288 if (ret < 0) {
2289 WARN(1, "tracer: %s failed selftest, disabling\n",
2290 p->type->name);
2291 last = &trace_types;
2292 for (t = trace_types; t; t = t->next) {
2293 if (t == p->type) {
2294 *last = t->next;
2295 break;
2296 }
2297 last = &t->next;
2298 }
2299 }
2300 list_del(&p->list);
2301 kfree(p);
2302 }
2303 tracing_selftest_running = false;
2304
2305 return 0;
2306 }
2307 core_initcall(init_trace_selftests);
2308 #else
do_run_tracer_selftest(struct tracer * type)2309 static inline int do_run_tracer_selftest(struct tracer *type)
2310 {
2311 return 0;
2312 }
2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2314
2315 static int add_tracer(struct trace_array *tr, struct tracer *t);
2316
2317 static void __init apply_trace_boot_options(void);
2318
free_tracers(struct trace_array * tr)2319 static void free_tracers(struct trace_array *tr)
2320 {
2321 struct tracers *t, *n;
2322
2323 lockdep_assert_held(&trace_types_lock);
2324
2325 list_for_each_entry_safe(t, n, &tr->tracers, list) {
2326 list_del(&t->list);
2327 kfree(t->flags);
2328 kfree(t);
2329 }
2330 }
2331
2332 /**
2333 * register_tracer - register a tracer with the ftrace system.
2334 * @type: the plugin for the tracer
2335 *
2336 * Register a new plugin tracer.
2337 */
register_tracer(struct tracer * type)2338 int __init register_tracer(struct tracer *type)
2339 {
2340 struct trace_array *tr;
2341 struct tracer *t;
2342 int ret = 0;
2343
2344 if (!type->name) {
2345 pr_info("Tracer must have a name\n");
2346 return -1;
2347 }
2348
2349 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2350 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2351 return -1;
2352 }
2353
2354 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2355 pr_warn("Can not register tracer %s due to lockdown\n",
2356 type->name);
2357 return -EPERM;
2358 }
2359
2360 mutex_lock(&trace_types_lock);
2361
2362 for (t = trace_types; t; t = t->next) {
2363 if (strcmp(type->name, t->name) == 0) {
2364 /* already found */
2365 pr_info("Tracer %s already registered\n",
2366 type->name);
2367 ret = -1;
2368 goto out;
2369 }
2370 }
2371
2372 /* store the tracer for __set_tracer_option */
2373 if (type->flags)
2374 type->flags->trace = type;
2375
2376 ret = do_run_tracer_selftest(type);
2377 if (ret < 0)
2378 goto out;
2379
2380 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2381 ret = add_tracer(tr, type);
2382 if (ret < 0) {
2383 /* The tracer will still exist but without options */
2384 pr_warn("Failed to create tracer options for %s\n", type->name);
2385 break;
2386 }
2387 }
2388
2389 type->next = trace_types;
2390 trace_types = type;
2391
2392 out:
2393 mutex_unlock(&trace_types_lock);
2394
2395 if (ret || !default_bootup_tracer)
2396 return ret;
2397
2398 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2399 return 0;
2400
2401 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2402 /* Do we want this tracer to start on bootup? */
2403 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
2404 default_bootup_tracer = NULL;
2405
2406 apply_trace_boot_options();
2407
2408 /* disable other selftests, since this will break it. */
2409 disable_tracing_selftest("running a tracer");
2410
2411 return 0;
2412 }
2413
tracing_reset_cpu(struct array_buffer * buf,int cpu)2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2415 {
2416 struct trace_buffer *buffer = buf->buffer;
2417
2418 if (!buffer)
2419 return;
2420
2421 ring_buffer_record_disable(buffer);
2422
2423 /* Make sure all commits have finished */
2424 synchronize_rcu();
2425 ring_buffer_reset_cpu(buffer, cpu);
2426
2427 ring_buffer_record_enable(buffer);
2428 }
2429
tracing_reset_online_cpus(struct array_buffer * buf)2430 void tracing_reset_online_cpus(struct array_buffer *buf)
2431 {
2432 struct trace_buffer *buffer = buf->buffer;
2433
2434 if (!buffer)
2435 return;
2436
2437 ring_buffer_record_disable(buffer);
2438
2439 /* Make sure all commits have finished */
2440 synchronize_rcu();
2441
2442 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443
2444 ring_buffer_reset_online_cpus(buffer);
2445
2446 ring_buffer_record_enable(buffer);
2447 }
2448
tracing_reset_all_cpus(struct array_buffer * buf)2449 static void tracing_reset_all_cpus(struct array_buffer *buf)
2450 {
2451 struct trace_buffer *buffer = buf->buffer;
2452
2453 if (!buffer)
2454 return;
2455
2456 ring_buffer_record_disable(buffer);
2457
2458 /* Make sure all commits have finished */
2459 synchronize_rcu();
2460
2461 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2462
2463 ring_buffer_reset(buffer);
2464
2465 ring_buffer_record_enable(buffer);
2466 }
2467
2468 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2469 void tracing_reset_all_online_cpus_unlocked(void)
2470 {
2471 struct trace_array *tr;
2472
2473 lockdep_assert_held(&trace_types_lock);
2474
2475 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2476 if (!tr->clear_trace)
2477 continue;
2478 tr->clear_trace = false;
2479 tracing_reset_online_cpus(&tr->array_buffer);
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481 tracing_reset_online_cpus(&tr->max_buffer);
2482 #endif
2483 }
2484 }
2485
tracing_reset_all_online_cpus(void)2486 void tracing_reset_all_online_cpus(void)
2487 {
2488 guard(mutex)(&trace_types_lock);
2489 tracing_reset_all_online_cpus_unlocked();
2490 }
2491
is_tracing_stopped(void)2492 int is_tracing_stopped(void)
2493 {
2494 return global_trace.stop_count;
2495 }
2496
tracing_start_tr(struct trace_array * tr)2497 static void tracing_start_tr(struct trace_array *tr)
2498 {
2499 struct trace_buffer *buffer;
2500
2501 if (tracing_disabled)
2502 return;
2503
2504 guard(raw_spinlock_irqsave)(&tr->start_lock);
2505 if (--tr->stop_count) {
2506 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2507 /* Someone screwed up their debugging */
2508 tr->stop_count = 0;
2509 }
2510 return;
2511 }
2512
2513 /* Prevent the buffers from switching */
2514 arch_spin_lock(&tr->max_lock);
2515
2516 buffer = tr->array_buffer.buffer;
2517 if (buffer)
2518 ring_buffer_record_enable(buffer);
2519
2520 #ifdef CONFIG_TRACER_MAX_TRACE
2521 buffer = tr->max_buffer.buffer;
2522 if (buffer)
2523 ring_buffer_record_enable(buffer);
2524 #endif
2525
2526 arch_spin_unlock(&tr->max_lock);
2527 }
2528
2529 /**
2530 * tracing_start - quick start of the tracer
2531 *
2532 * If tracing is enabled but was stopped by tracing_stop,
2533 * this will start the tracer back up.
2534 */
tracing_start(void)2535 void tracing_start(void)
2536
2537 {
2538 return tracing_start_tr(&global_trace);
2539 }
2540
tracing_stop_tr(struct trace_array * tr)2541 static void tracing_stop_tr(struct trace_array *tr)
2542 {
2543 struct trace_buffer *buffer;
2544
2545 guard(raw_spinlock_irqsave)(&tr->start_lock);
2546 if (tr->stop_count++)
2547 return;
2548
2549 /* Prevent the buffers from switching */
2550 arch_spin_lock(&tr->max_lock);
2551
2552 buffer = tr->array_buffer.buffer;
2553 if (buffer)
2554 ring_buffer_record_disable(buffer);
2555
2556 #ifdef CONFIG_TRACER_MAX_TRACE
2557 buffer = tr->max_buffer.buffer;
2558 if (buffer)
2559 ring_buffer_record_disable(buffer);
2560 #endif
2561
2562 arch_spin_unlock(&tr->max_lock);
2563 }
2564
2565 /**
2566 * tracing_stop - quick stop of the tracer
2567 *
2568 * Light weight way to stop tracing. Use in conjunction with
2569 * tracing_start.
2570 */
tracing_stop(void)2571 void tracing_stop(void)
2572 {
2573 return tracing_stop_tr(&global_trace);
2574 }
2575
2576 /*
2577 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2578 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2579 * simplifies those functions and keeps them in sync.
2580 */
trace_handle_return(struct trace_seq * s)2581 enum print_line_t trace_handle_return(struct trace_seq *s)
2582 {
2583 return trace_seq_has_overflowed(s) ?
2584 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2585 }
2586 EXPORT_SYMBOL_GPL(trace_handle_return);
2587
migration_disable_value(void)2588 static unsigned short migration_disable_value(void)
2589 {
2590 #if defined(CONFIG_SMP)
2591 return current->migration_disabled;
2592 #else
2593 return 0;
2594 #endif
2595 }
2596
tracing_gen_ctx_irq_test(unsigned int irqs_status)2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2598 {
2599 unsigned int trace_flags = irqs_status;
2600 unsigned int pc;
2601
2602 pc = preempt_count();
2603
2604 if (pc & NMI_MASK)
2605 trace_flags |= TRACE_FLAG_NMI;
2606 if (pc & HARDIRQ_MASK)
2607 trace_flags |= TRACE_FLAG_HARDIRQ;
2608 if (in_serving_softirq())
2609 trace_flags |= TRACE_FLAG_SOFTIRQ;
2610 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2611 trace_flags |= TRACE_FLAG_BH_OFF;
2612
2613 if (tif_need_resched())
2614 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2615 if (test_preempt_need_resched())
2616 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2617 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2618 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2619 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2620 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2621 }
2622
2623 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2624 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2625 int type,
2626 unsigned long len,
2627 unsigned int trace_ctx)
2628 {
2629 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2630 }
2631
2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2634 static int trace_buffered_event_ref;
2635
2636 /**
2637 * trace_buffered_event_enable - enable buffering events
2638 *
2639 * When events are being filtered, it is quicker to use a temporary
2640 * buffer to write the event data into if there's a likely chance
2641 * that it will not be committed. The discard of the ring buffer
2642 * is not as fast as committing, and is much slower than copying
2643 * a commit.
2644 *
2645 * When an event is to be filtered, allocate per cpu buffers to
2646 * write the event data into, and if the event is filtered and discarded
2647 * it is simply dropped, otherwise, the entire data is to be committed
2648 * in one shot.
2649 */
trace_buffered_event_enable(void)2650 void trace_buffered_event_enable(void)
2651 {
2652 struct ring_buffer_event *event;
2653 struct page *page;
2654 int cpu;
2655
2656 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2657
2658 if (trace_buffered_event_ref++)
2659 return;
2660
2661 for_each_tracing_cpu(cpu) {
2662 page = alloc_pages_node(cpu_to_node(cpu),
2663 GFP_KERNEL | __GFP_NORETRY, 0);
2664 /* This is just an optimization and can handle failures */
2665 if (!page) {
2666 pr_err("Failed to allocate event buffer\n");
2667 break;
2668 }
2669
2670 event = page_address(page);
2671 memset(event, 0, sizeof(*event));
2672
2673 per_cpu(trace_buffered_event, cpu) = event;
2674
2675 scoped_guard(preempt,) {
2676 if (cpu == smp_processor_id() &&
2677 __this_cpu_read(trace_buffered_event) !=
2678 per_cpu(trace_buffered_event, cpu))
2679 WARN_ON_ONCE(1);
2680 }
2681 }
2682 }
2683
enable_trace_buffered_event(void * data)2684 static void enable_trace_buffered_event(void *data)
2685 {
2686 this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693
2694 /**
2695 * trace_buffered_event_disable - disable buffering events
2696 *
2697 * When a filter is removed, it is faster to not use the buffered
2698 * events, and to commit directly into the ring buffer. Free up
2699 * the temp buffers when there are no more users. This requires
2700 * special synchronization with current events.
2701 */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 int cpu;
2705
2706 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707
2708 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 return;
2710
2711 if (--trace_buffered_event_ref)
2712 return;
2713
2714 /* For each CPU, set the buffer as used. */
2715 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 NULL, true);
2717
2718 /* Wait for all current users to finish */
2719 synchronize_rcu();
2720
2721 for_each_tracing_cpu(cpu) {
2722 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 per_cpu(trace_buffered_event, cpu) = NULL;
2724 }
2725
2726 /*
2727 * Wait for all CPUs that potentially started checking if they can use
2728 * their event buffer only after the previous synchronize_rcu() call and
2729 * they still read a valid pointer from trace_buffered_event. It must be
2730 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 */
2733 synchronize_rcu();
2734
2735 /* For each CPU, relinquish the buffer */
2736 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 true);
2738 }
2739
2740 static struct trace_buffer *temp_buffer;
2741
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 struct trace_event_file *trace_file,
2745 int type, unsigned long len,
2746 unsigned int trace_ctx)
2747 {
2748 struct ring_buffer_event *entry;
2749 struct trace_array *tr = trace_file->tr;
2750 int val;
2751
2752 *current_rb = tr->array_buffer.buffer;
2753
2754 if (!tr->no_filter_buffering_ref &&
2755 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2756 preempt_disable_notrace();
2757 /*
2758 * Filtering is on, so try to use the per cpu buffer first.
2759 * This buffer will simulate a ring_buffer_event,
2760 * where the type_len is zero and the array[0] will
2761 * hold the full length.
2762 * (see include/linux/ring-buffer.h for details on
2763 * how the ring_buffer_event is structured).
2764 *
2765 * Using a temp buffer during filtering and copying it
2766 * on a matched filter is quicker than writing directly
2767 * into the ring buffer and then discarding it when
2768 * it doesn't match. That is because the discard
2769 * requires several atomic operations to get right.
2770 * Copying on match and doing nothing on a failed match
2771 * is still quicker than no copy on match, but having
2772 * to discard out of the ring buffer on a failed match.
2773 */
2774 if ((entry = __this_cpu_read(trace_buffered_event))) {
2775 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2776
2777 val = this_cpu_inc_return(trace_buffered_event_cnt);
2778
2779 /*
2780 * Preemption is disabled, but interrupts and NMIs
2781 * can still come in now. If that happens after
2782 * the above increment, then it will have to go
2783 * back to the old method of allocating the event
2784 * on the ring buffer, and if the filter fails, it
2785 * will have to call ring_buffer_discard_commit()
2786 * to remove it.
2787 *
2788 * Need to also check the unlikely case that the
2789 * length is bigger than the temp buffer size.
2790 * If that happens, then the reserve is pretty much
2791 * guaranteed to fail, as the ring buffer currently
2792 * only allows events less than a page. But that may
2793 * change in the future, so let the ring buffer reserve
2794 * handle the failure in that case.
2795 */
2796 if (val == 1 && likely(len <= max_len)) {
2797 trace_event_setup(entry, type, trace_ctx);
2798 entry->array[0] = len;
2799 /* Return with preemption disabled */
2800 return entry;
2801 }
2802 this_cpu_dec(trace_buffered_event_cnt);
2803 }
2804 /* __trace_buffer_lock_reserve() disables preemption */
2805 preempt_enable_notrace();
2806 }
2807
2808 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2809 trace_ctx);
2810 /*
2811 * If tracing is off, but we have triggers enabled
2812 * we still need to look at the event data. Use the temp_buffer
2813 * to store the trace event for the trigger to use. It's recursive
2814 * safe and will not be recorded anywhere.
2815 */
2816 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2817 *current_rb = temp_buffer;
2818 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2819 trace_ctx);
2820 }
2821 return entry;
2822 }
2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2824
2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2826 static DEFINE_MUTEX(tracepoint_printk_mutex);
2827
output_printk(struct trace_event_buffer * fbuffer)2828 static void output_printk(struct trace_event_buffer *fbuffer)
2829 {
2830 struct trace_event_call *event_call;
2831 struct trace_event_file *file;
2832 struct trace_event *event;
2833 unsigned long flags;
2834 struct trace_iterator *iter = tracepoint_print_iter;
2835
2836 /* We should never get here if iter is NULL */
2837 if (WARN_ON_ONCE(!iter))
2838 return;
2839
2840 event_call = fbuffer->trace_file->event_call;
2841 if (!event_call || !event_call->event.funcs ||
2842 !event_call->event.funcs->trace)
2843 return;
2844
2845 file = fbuffer->trace_file;
2846 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2847 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2848 !filter_match_preds(file->filter, fbuffer->entry)))
2849 return;
2850
2851 event = &fbuffer->trace_file->event_call->event;
2852
2853 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2854 trace_seq_init(&iter->seq);
2855 iter->ent = fbuffer->entry;
2856 event_call->event.funcs->trace(iter, 0, event);
2857 trace_seq_putc(&iter->seq, 0);
2858 printk("%s", iter->seq.buffer);
2859
2860 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2861 }
2862
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2864 void *buffer, size_t *lenp,
2865 loff_t *ppos)
2866 {
2867 int save_tracepoint_printk;
2868 int ret;
2869
2870 guard(mutex)(&tracepoint_printk_mutex);
2871 save_tracepoint_printk = tracepoint_printk;
2872
2873 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2874
2875 /*
2876 * This will force exiting early, as tracepoint_printk
2877 * is always zero when tracepoint_printk_iter is not allocated
2878 */
2879 if (!tracepoint_print_iter)
2880 tracepoint_printk = 0;
2881
2882 if (save_tracepoint_printk == tracepoint_printk)
2883 return ret;
2884
2885 if (tracepoint_printk)
2886 static_key_enable(&tracepoint_printk_key.key);
2887 else
2888 static_key_disable(&tracepoint_printk_key.key);
2889
2890 return ret;
2891 }
2892
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2894 {
2895 enum event_trigger_type tt = ETT_NONE;
2896 struct trace_event_file *file = fbuffer->trace_file;
2897
2898 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2899 fbuffer->entry, &tt))
2900 goto discard;
2901
2902 if (static_key_false(&tracepoint_printk_key.key))
2903 output_printk(fbuffer);
2904
2905 if (static_branch_unlikely(&trace_event_exports_enabled))
2906 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2907
2908 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2909 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2910
2911 discard:
2912 if (tt)
2913 event_triggers_post_call(file, tt);
2914
2915 }
2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2917
2918 /*
2919 * Skip 3:
2920 *
2921 * trace_buffer_unlock_commit_regs()
2922 * trace_event_buffer_commit()
2923 * trace_event_raw_event_xxx()
2924 */
2925 # define STACK_SKIP 3
2926
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2928 struct trace_buffer *buffer,
2929 struct ring_buffer_event *event,
2930 unsigned int trace_ctx,
2931 struct pt_regs *regs)
2932 {
2933 __buffer_unlock_commit(buffer, event);
2934
2935 /*
2936 * If regs is not set, then skip the necessary functions.
2937 * Note, we can still get here via blktrace, wakeup tracer
2938 * and mmiotrace, but that's ok if they lose a function or
2939 * two. They are not that meaningful.
2940 */
2941 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2942 ftrace_trace_userstack(tr, buffer, trace_ctx);
2943 }
2944
2945 /*
2946 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2947 */
2948 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2950 struct ring_buffer_event *event)
2951 {
2952 __buffer_unlock_commit(buffer, event);
2953 }
2954
2955 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2957 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2958 {
2959 struct trace_buffer *buffer = tr->array_buffer.buffer;
2960 struct ring_buffer_event *event;
2961 struct ftrace_entry *entry;
2962 int size = sizeof(*entry);
2963
2964 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2965
2966 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2967 trace_ctx);
2968 if (!event)
2969 return;
2970 entry = ring_buffer_event_data(event);
2971 entry->ip = ip;
2972 entry->parent_ip = parent_ip;
2973
2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2975 if (fregs) {
2976 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2977 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2978 }
2979 #endif
2980
2981 if (static_branch_unlikely(&trace_function_exports_enabled))
2982 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2983 __buffer_unlock_commit(buffer, event);
2984 }
2985
2986 #ifdef CONFIG_STACKTRACE
2987
2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2989 #define FTRACE_KSTACK_NESTING 4
2990
2991 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2992
2993 struct ftrace_stack {
2994 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2995 };
2996
2997
2998 struct ftrace_stacks {
2999 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3000 };
3001
3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3004
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3005 static void __ftrace_trace_stack(struct trace_array *tr,
3006 struct trace_buffer *buffer,
3007 unsigned int trace_ctx,
3008 int skip, struct pt_regs *regs)
3009 {
3010 struct ring_buffer_event *event;
3011 unsigned int size, nr_entries;
3012 struct ftrace_stack *fstack;
3013 struct stack_entry *entry;
3014 int stackidx;
3015 int bit;
3016
3017 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
3018 if (bit < 0)
3019 return;
3020
3021 /*
3022 * Add one, for this function and the call to save_stack_trace()
3023 * If regs is set, then these functions will not be in the way.
3024 */
3025 #ifndef CONFIG_UNWINDER_ORC
3026 if (!regs)
3027 skip++;
3028 #endif
3029
3030 guard(preempt_notrace)();
3031
3032 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3033
3034 /* This should never happen. If it does, yell once and skip */
3035 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3036 goto out;
3037
3038 /*
3039 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3040 * interrupt will either see the value pre increment or post
3041 * increment. If the interrupt happens pre increment it will have
3042 * restored the counter when it returns. We just need a barrier to
3043 * keep gcc from moving things around.
3044 */
3045 barrier();
3046
3047 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3048 size = ARRAY_SIZE(fstack->calls);
3049
3050 if (regs) {
3051 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3052 size, skip);
3053 } else {
3054 nr_entries = stack_trace_save(fstack->calls, size, skip);
3055 }
3056
3057 #ifdef CONFIG_DYNAMIC_FTRACE
3058 /* Mark entry of stack trace as trampoline code */
3059 if (tr->ops && tr->ops->trampoline) {
3060 unsigned long tramp_start = tr->ops->trampoline;
3061 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3062 unsigned long *calls = fstack->calls;
3063
3064 for (int i = 0; i < nr_entries; i++) {
3065 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3066 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3067 }
3068 }
3069 #endif
3070
3071 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3072 struct_size(entry, caller, nr_entries),
3073 trace_ctx);
3074 if (!event)
3075 goto out;
3076 entry = ring_buffer_event_data(event);
3077
3078 entry->size = nr_entries;
3079 memcpy(&entry->caller, fstack->calls,
3080 flex_array_size(entry, caller, nr_entries));
3081
3082 __buffer_unlock_commit(buffer, event);
3083
3084 out:
3085 /* Again, don't let gcc optimize things here */
3086 barrier();
3087 __this_cpu_dec(ftrace_stack_reserve);
3088 trace_clear_recursion(bit);
3089 }
3090
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3091 static inline void ftrace_trace_stack(struct trace_array *tr,
3092 struct trace_buffer *buffer,
3093 unsigned int trace_ctx,
3094 int skip, struct pt_regs *regs)
3095 {
3096 if (!(tr->trace_flags & TRACE_ITER(STACKTRACE)))
3097 return;
3098
3099 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3100 }
3101
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3102 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3103 int skip)
3104 {
3105 struct trace_buffer *buffer = tr->array_buffer.buffer;
3106
3107 if (rcu_is_watching()) {
3108 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3109 return;
3110 }
3111
3112 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3113 return;
3114
3115 /*
3116 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3117 * but if the above rcu_is_watching() failed, then the NMI
3118 * triggered someplace critical, and ct_irq_enter() should
3119 * not be called from NMI.
3120 */
3121 if (unlikely(in_nmi()))
3122 return;
3123
3124 ct_irq_enter_irqson();
3125 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3126 ct_irq_exit_irqson();
3127 }
3128
3129 /**
3130 * trace_dump_stack - record a stack back trace in the trace buffer
3131 * @skip: Number of functions to skip (helper handlers)
3132 */
trace_dump_stack(int skip)3133 void trace_dump_stack(int skip)
3134 {
3135 if (tracing_disabled || tracing_selftest_running)
3136 return;
3137
3138 #ifndef CONFIG_UNWINDER_ORC
3139 /* Skip 1 to skip this function. */
3140 skip++;
3141 #endif
3142 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3143 tracing_gen_ctx(), skip, NULL);
3144 }
3145 EXPORT_SYMBOL_GPL(trace_dump_stack);
3146
3147 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3148 static DEFINE_PER_CPU(int, user_stack_count);
3149
3150 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3151 ftrace_trace_userstack(struct trace_array *tr,
3152 struct trace_buffer *buffer, unsigned int trace_ctx)
3153 {
3154 struct ring_buffer_event *event;
3155 struct userstack_entry *entry;
3156
3157 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
3158 return;
3159
3160 /*
3161 * NMIs can not handle page faults, even with fix ups.
3162 * The save user stack can (and often does) fault.
3163 */
3164 if (unlikely(in_nmi()))
3165 return;
3166
3167 /*
3168 * prevent recursion, since the user stack tracing may
3169 * trigger other kernel events.
3170 */
3171 guard(preempt)();
3172 if (__this_cpu_read(user_stack_count))
3173 return;
3174
3175 __this_cpu_inc(user_stack_count);
3176
3177 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3178 sizeof(*entry), trace_ctx);
3179 if (!event)
3180 goto out_drop_count;
3181 entry = ring_buffer_event_data(event);
3182
3183 entry->tgid = current->tgid;
3184 memset(&entry->caller, 0, sizeof(entry->caller));
3185
3186 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3187 __buffer_unlock_commit(buffer, event);
3188
3189 out_drop_count:
3190 __this_cpu_dec(user_stack_count);
3191 }
3192 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3193 static void ftrace_trace_userstack(struct trace_array *tr,
3194 struct trace_buffer *buffer,
3195 unsigned int trace_ctx)
3196 {
3197 }
3198 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3199
3200 #endif /* CONFIG_STACKTRACE */
3201
3202 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3203 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3204 unsigned long long delta)
3205 {
3206 entry->bottom_delta_ts = delta & U32_MAX;
3207 entry->top_delta_ts = (delta >> 32);
3208 }
3209
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3210 void trace_last_func_repeats(struct trace_array *tr,
3211 struct trace_func_repeats *last_info,
3212 unsigned int trace_ctx)
3213 {
3214 struct trace_buffer *buffer = tr->array_buffer.buffer;
3215 struct func_repeats_entry *entry;
3216 struct ring_buffer_event *event;
3217 u64 delta;
3218
3219 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3220 sizeof(*entry), trace_ctx);
3221 if (!event)
3222 return;
3223
3224 delta = ring_buffer_event_time_stamp(buffer, event) -
3225 last_info->ts_last_call;
3226
3227 entry = ring_buffer_event_data(event);
3228 entry->ip = last_info->ip;
3229 entry->parent_ip = last_info->parent_ip;
3230 entry->count = last_info->count;
3231 func_repeats_set_delta_ts(entry, delta);
3232
3233 __buffer_unlock_commit(buffer, event);
3234 }
3235
3236 /* created for use with alloc_percpu */
3237 struct trace_buffer_struct {
3238 int nesting;
3239 char buffer[4][TRACE_BUF_SIZE];
3240 };
3241
3242 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3243
3244 /*
3245 * This allows for lockless recording. If we're nested too deeply, then
3246 * this returns NULL.
3247 */
get_trace_buf(void)3248 static char *get_trace_buf(void)
3249 {
3250 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3251
3252 if (!trace_percpu_buffer || buffer->nesting >= 4)
3253 return NULL;
3254
3255 buffer->nesting++;
3256
3257 /* Interrupts must see nesting incremented before we use the buffer */
3258 barrier();
3259 return &buffer->buffer[buffer->nesting - 1][0];
3260 }
3261
put_trace_buf(void)3262 static void put_trace_buf(void)
3263 {
3264 /* Don't let the decrement of nesting leak before this */
3265 barrier();
3266 this_cpu_dec(trace_percpu_buffer->nesting);
3267 }
3268
alloc_percpu_trace_buffer(void)3269 static int alloc_percpu_trace_buffer(void)
3270 {
3271 struct trace_buffer_struct __percpu *buffers;
3272
3273 if (trace_percpu_buffer)
3274 return 0;
3275
3276 buffers = alloc_percpu(struct trace_buffer_struct);
3277 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3278 return -ENOMEM;
3279
3280 trace_percpu_buffer = buffers;
3281 return 0;
3282 }
3283
3284 static int buffers_allocated;
3285
trace_printk_init_buffers(void)3286 void trace_printk_init_buffers(void)
3287 {
3288 if (buffers_allocated)
3289 return;
3290
3291 if (alloc_percpu_trace_buffer())
3292 return;
3293
3294 /* trace_printk() is for debug use only. Don't use it in production. */
3295
3296 pr_warn("\n");
3297 pr_warn("**********************************************************\n");
3298 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3299 pr_warn("** **\n");
3300 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3301 pr_warn("** **\n");
3302 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3303 pr_warn("** unsafe for production use. **\n");
3304 pr_warn("** **\n");
3305 pr_warn("** If you see this message and you are not debugging **\n");
3306 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3307 pr_warn("** **\n");
3308 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3309 pr_warn("**********************************************************\n");
3310
3311 /* Expand the buffers to set size */
3312 tracing_update_buffers(&global_trace);
3313
3314 buffers_allocated = 1;
3315
3316 /*
3317 * trace_printk_init_buffers() can be called by modules.
3318 * If that happens, then we need to start cmdline recording
3319 * directly here. If the global_trace.buffer is already
3320 * allocated here, then this was called by module code.
3321 */
3322 if (global_trace.array_buffer.buffer)
3323 tracing_start_cmdline_record();
3324 }
3325 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3326
trace_printk_start_comm(void)3327 void trace_printk_start_comm(void)
3328 {
3329 /* Start tracing comms if trace printk is set */
3330 if (!buffers_allocated)
3331 return;
3332 tracing_start_cmdline_record();
3333 }
3334
trace_printk_start_stop_comm(int enabled)3335 static void trace_printk_start_stop_comm(int enabled)
3336 {
3337 if (!buffers_allocated)
3338 return;
3339
3340 if (enabled)
3341 tracing_start_cmdline_record();
3342 else
3343 tracing_stop_cmdline_record();
3344 }
3345
3346 /**
3347 * trace_vbprintk - write binary msg to tracing buffer
3348 * @ip: The address of the caller
3349 * @fmt: The string format to write to the buffer
3350 * @args: Arguments for @fmt
3351 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3352 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3353 {
3354 struct ring_buffer_event *event;
3355 struct trace_buffer *buffer;
3356 struct trace_array *tr = READ_ONCE(printk_trace);
3357 struct bprint_entry *entry;
3358 unsigned int trace_ctx;
3359 char *tbuffer;
3360 int len = 0, size;
3361
3362 if (!printk_binsafe(tr))
3363 return trace_vprintk(ip, fmt, args);
3364
3365 if (unlikely(tracing_selftest_running || tracing_disabled))
3366 return 0;
3367
3368 /* Don't pollute graph traces with trace_vprintk internals */
3369 pause_graph_tracing();
3370
3371 trace_ctx = tracing_gen_ctx();
3372 guard(preempt_notrace)();
3373
3374 tbuffer = get_trace_buf();
3375 if (!tbuffer) {
3376 len = 0;
3377 goto out_nobuffer;
3378 }
3379
3380 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3381
3382 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3383 goto out_put;
3384
3385 size = sizeof(*entry) + sizeof(u32) * len;
3386 buffer = tr->array_buffer.buffer;
3387 scoped_guard(ring_buffer_nest, buffer) {
3388 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3389 trace_ctx);
3390 if (!event)
3391 goto out_put;
3392 entry = ring_buffer_event_data(event);
3393 entry->ip = ip;
3394 entry->fmt = fmt;
3395
3396 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3397 __buffer_unlock_commit(buffer, event);
3398 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3399 }
3400 out_put:
3401 put_trace_buf();
3402
3403 out_nobuffer:
3404 unpause_graph_tracing();
3405
3406 return len;
3407 }
3408 EXPORT_SYMBOL_GPL(trace_vbprintk);
3409
3410 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3411 int __trace_array_vprintk(struct trace_buffer *buffer,
3412 unsigned long ip, const char *fmt, va_list args)
3413 {
3414 struct ring_buffer_event *event;
3415 int len = 0, size;
3416 struct print_entry *entry;
3417 unsigned int trace_ctx;
3418 char *tbuffer;
3419
3420 if (tracing_disabled)
3421 return 0;
3422
3423 /* Don't pollute graph traces with trace_vprintk internals */
3424 pause_graph_tracing();
3425
3426 trace_ctx = tracing_gen_ctx();
3427 guard(preempt_notrace)();
3428
3429
3430 tbuffer = get_trace_buf();
3431 if (!tbuffer) {
3432 len = 0;
3433 goto out_nobuffer;
3434 }
3435
3436 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3437
3438 size = sizeof(*entry) + len + 1;
3439 scoped_guard(ring_buffer_nest, buffer) {
3440 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3441 trace_ctx);
3442 if (!event)
3443 goto out;
3444 entry = ring_buffer_event_data(event);
3445 entry->ip = ip;
3446
3447 memcpy(&entry->buf, tbuffer, len + 1);
3448 __buffer_unlock_commit(buffer, event);
3449 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3450 }
3451 out:
3452 put_trace_buf();
3453
3454 out_nobuffer:
3455 unpause_graph_tracing();
3456
3457 return len;
3458 }
3459
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3460 int trace_array_vprintk(struct trace_array *tr,
3461 unsigned long ip, const char *fmt, va_list args)
3462 {
3463 if (tracing_selftest_running && tr == &global_trace)
3464 return 0;
3465
3466 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3467 }
3468
3469 /**
3470 * trace_array_printk - Print a message to a specific instance
3471 * @tr: The instance trace_array descriptor
3472 * @ip: The instruction pointer that this is called from.
3473 * @fmt: The format to print (printf format)
3474 *
3475 * If a subsystem sets up its own instance, they have the right to
3476 * printk strings into their tracing instance buffer using this
3477 * function. Note, this function will not write into the top level
3478 * buffer (use trace_printk() for that), as writing into the top level
3479 * buffer should only have events that can be individually disabled.
3480 * trace_printk() is only used for debugging a kernel, and should not
3481 * be ever incorporated in normal use.
3482 *
3483 * trace_array_printk() can be used, as it will not add noise to the
3484 * top level tracing buffer.
3485 *
3486 * Note, trace_array_init_printk() must be called on @tr before this
3487 * can be used.
3488 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3489 int trace_array_printk(struct trace_array *tr,
3490 unsigned long ip, const char *fmt, ...)
3491 {
3492 int ret;
3493 va_list ap;
3494
3495 if (!tr)
3496 return -ENOENT;
3497
3498 /* This is only allowed for created instances */
3499 if (tr == &global_trace)
3500 return 0;
3501
3502 if (!(tr->trace_flags & TRACE_ITER(PRINTK)))
3503 return 0;
3504
3505 va_start(ap, fmt);
3506 ret = trace_array_vprintk(tr, ip, fmt, ap);
3507 va_end(ap);
3508 return ret;
3509 }
3510 EXPORT_SYMBOL_GPL(trace_array_printk);
3511
3512 /**
3513 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3514 * @tr: The trace array to initialize the buffers for
3515 *
3516 * As trace_array_printk() only writes into instances, they are OK to
3517 * have in the kernel (unlike trace_printk()). This needs to be called
3518 * before trace_array_printk() can be used on a trace_array.
3519 */
trace_array_init_printk(struct trace_array * tr)3520 int trace_array_init_printk(struct trace_array *tr)
3521 {
3522 if (!tr)
3523 return -ENOENT;
3524
3525 /* This is only allowed for created instances */
3526 if (tr == &global_trace)
3527 return -EINVAL;
3528
3529 return alloc_percpu_trace_buffer();
3530 }
3531 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3532
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3533 int trace_array_printk_buf(struct trace_buffer *buffer,
3534 unsigned long ip, const char *fmt, ...)
3535 {
3536 int ret;
3537 va_list ap;
3538
3539 if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK)))
3540 return 0;
3541
3542 va_start(ap, fmt);
3543 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544 va_end(ap);
3545 return ret;
3546 }
3547
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3548 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3549 {
3550 return trace_array_vprintk(printk_trace, ip, fmt, args);
3551 }
3552 EXPORT_SYMBOL_GPL(trace_vprintk);
3553
trace_iterator_increment(struct trace_iterator * iter)3554 static void trace_iterator_increment(struct trace_iterator *iter)
3555 {
3556 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3557
3558 iter->idx++;
3559 if (buf_iter)
3560 ring_buffer_iter_advance(buf_iter);
3561 }
3562
3563 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3564 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3565 unsigned long *lost_events)
3566 {
3567 struct ring_buffer_event *event;
3568 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3569
3570 if (buf_iter) {
3571 event = ring_buffer_iter_peek(buf_iter, ts);
3572 if (lost_events)
3573 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3574 (unsigned long)-1 : 0;
3575 } else {
3576 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3577 lost_events);
3578 }
3579
3580 if (event) {
3581 iter->ent_size = ring_buffer_event_length(event);
3582 return ring_buffer_event_data(event);
3583 }
3584 iter->ent_size = 0;
3585 return NULL;
3586 }
3587
3588 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3589 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3590 unsigned long *missing_events, u64 *ent_ts)
3591 {
3592 struct trace_buffer *buffer = iter->array_buffer->buffer;
3593 struct trace_entry *ent, *next = NULL;
3594 unsigned long lost_events = 0, next_lost = 0;
3595 int cpu_file = iter->cpu_file;
3596 u64 next_ts = 0, ts;
3597 int next_cpu = -1;
3598 int next_size = 0;
3599 int cpu;
3600
3601 /*
3602 * If we are in a per_cpu trace file, don't bother by iterating over
3603 * all cpu and peek directly.
3604 */
3605 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3606 if (ring_buffer_empty_cpu(buffer, cpu_file))
3607 return NULL;
3608 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3609 if (ent_cpu)
3610 *ent_cpu = cpu_file;
3611
3612 return ent;
3613 }
3614
3615 for_each_tracing_cpu(cpu) {
3616
3617 if (ring_buffer_empty_cpu(buffer, cpu))
3618 continue;
3619
3620 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3621
3622 /*
3623 * Pick the entry with the smallest timestamp:
3624 */
3625 if (ent && (!next || ts < next_ts)) {
3626 next = ent;
3627 next_cpu = cpu;
3628 next_ts = ts;
3629 next_lost = lost_events;
3630 next_size = iter->ent_size;
3631 }
3632 }
3633
3634 iter->ent_size = next_size;
3635
3636 if (ent_cpu)
3637 *ent_cpu = next_cpu;
3638
3639 if (ent_ts)
3640 *ent_ts = next_ts;
3641
3642 if (missing_events)
3643 *missing_events = next_lost;
3644
3645 return next;
3646 }
3647
3648 #define STATIC_FMT_BUF_SIZE 128
3649 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3650
trace_iter_expand_format(struct trace_iterator * iter)3651 char *trace_iter_expand_format(struct trace_iterator *iter)
3652 {
3653 char *tmp;
3654
3655 /*
3656 * iter->tr is NULL when used with tp_printk, which makes
3657 * this get called where it is not safe to call krealloc().
3658 */
3659 if (!iter->tr || iter->fmt == static_fmt_buf)
3660 return NULL;
3661
3662 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3663 GFP_KERNEL);
3664 if (tmp) {
3665 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3666 iter->fmt = tmp;
3667 }
3668
3669 return tmp;
3670 }
3671
3672 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3673 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3674 {
3675 unsigned long addr = (unsigned long)str;
3676 struct trace_event *trace_event;
3677 struct trace_event_call *event;
3678
3679 /* OK if part of the event data */
3680 if ((addr >= (unsigned long)iter->ent) &&
3681 (addr < (unsigned long)iter->ent + iter->ent_size))
3682 return true;
3683
3684 /* OK if part of the temp seq buffer */
3685 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3686 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3687 return true;
3688
3689 /* Core rodata can not be freed */
3690 if (is_kernel_rodata(addr))
3691 return true;
3692
3693 if (trace_is_tracepoint_string(str))
3694 return true;
3695
3696 /*
3697 * Now this could be a module event, referencing core module
3698 * data, which is OK.
3699 */
3700 if (!iter->ent)
3701 return false;
3702
3703 trace_event = ftrace_find_event(iter->ent->type);
3704 if (!trace_event)
3705 return false;
3706
3707 event = container_of(trace_event, struct trace_event_call, event);
3708 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3709 return false;
3710
3711 /* Would rather have rodata, but this will suffice */
3712 if (within_module_core(addr, event->module))
3713 return true;
3714
3715 return false;
3716 }
3717
3718 /**
3719 * ignore_event - Check dereferenced fields while writing to the seq buffer
3720 * @iter: The iterator that holds the seq buffer and the event being printed
3721 *
3722 * At boot up, test_event_printk() will flag any event that dereferences
3723 * a string with "%s" that does exist in the ring buffer. It may still
3724 * be valid, as the string may point to a static string in the kernel
3725 * rodata that never gets freed. But if the string pointer is pointing
3726 * to something that was allocated, there's a chance that it can be freed
3727 * by the time the user reads the trace. This would cause a bad memory
3728 * access by the kernel and possibly crash the system.
3729 *
3730 * This function will check if the event has any fields flagged as needing
3731 * to be checked at runtime and perform those checks.
3732 *
3733 * If it is found that a field is unsafe, it will write into the @iter->seq
3734 * a message stating what was found to be unsafe.
3735 *
3736 * @return: true if the event is unsafe and should be ignored,
3737 * false otherwise.
3738 */
ignore_event(struct trace_iterator * iter)3739 bool ignore_event(struct trace_iterator *iter)
3740 {
3741 struct ftrace_event_field *field;
3742 struct trace_event *trace_event;
3743 struct trace_event_call *event;
3744 struct list_head *head;
3745 struct trace_seq *seq;
3746 const void *ptr;
3747
3748 trace_event = ftrace_find_event(iter->ent->type);
3749
3750 seq = &iter->seq;
3751
3752 if (!trace_event) {
3753 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3754 return true;
3755 }
3756
3757 event = container_of(trace_event, struct trace_event_call, event);
3758 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3759 return false;
3760
3761 head = trace_get_fields(event);
3762 if (!head) {
3763 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3764 trace_event_name(event));
3765 return true;
3766 }
3767
3768 /* Offsets are from the iter->ent that points to the raw event */
3769 ptr = iter->ent;
3770
3771 list_for_each_entry(field, head, link) {
3772 const char *str;
3773 bool good;
3774
3775 if (!field->needs_test)
3776 continue;
3777
3778 str = *(const char **)(ptr + field->offset);
3779
3780 good = trace_safe_str(iter, str);
3781
3782 /*
3783 * If you hit this warning, it is likely that the
3784 * trace event in question used %s on a string that
3785 * was saved at the time of the event, but may not be
3786 * around when the trace is read. Use __string(),
3787 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3788 * instead. See samples/trace_events/trace-events-sample.h
3789 * for reference.
3790 */
3791 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3792 trace_event_name(event), field->name)) {
3793 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3794 trace_event_name(event), field->name);
3795 return true;
3796 }
3797 }
3798 return false;
3799 }
3800
trace_event_format(struct trace_iterator * iter,const char * fmt)3801 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3802 {
3803 const char *p, *new_fmt;
3804 char *q;
3805
3806 if (WARN_ON_ONCE(!fmt))
3807 return fmt;
3808
3809 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
3810 return fmt;
3811
3812 p = fmt;
3813 new_fmt = q = iter->fmt;
3814 while (*p) {
3815 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3816 if (!trace_iter_expand_format(iter))
3817 return fmt;
3818
3819 q += iter->fmt - new_fmt;
3820 new_fmt = iter->fmt;
3821 }
3822
3823 *q++ = *p++;
3824
3825 /* Replace %p with %px */
3826 if (p[-1] == '%') {
3827 if (p[0] == '%') {
3828 *q++ = *p++;
3829 } else if (p[0] == 'p' && !isalnum(p[1])) {
3830 *q++ = *p++;
3831 *q++ = 'x';
3832 }
3833 }
3834 }
3835 *q = '\0';
3836
3837 return new_fmt;
3838 }
3839
3840 #define STATIC_TEMP_BUF_SIZE 128
3841 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3842
3843 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3844 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3845 int *ent_cpu, u64 *ent_ts)
3846 {
3847 /* __find_next_entry will reset ent_size */
3848 int ent_size = iter->ent_size;
3849 struct trace_entry *entry;
3850
3851 /*
3852 * If called from ftrace_dump(), then the iter->temp buffer
3853 * will be the static_temp_buf and not created from kmalloc.
3854 * If the entry size is greater than the buffer, we can
3855 * not save it. Just return NULL in that case. This is only
3856 * used to add markers when two consecutive events' time
3857 * stamps have a large delta. See trace_print_lat_context()
3858 */
3859 if (iter->temp == static_temp_buf &&
3860 STATIC_TEMP_BUF_SIZE < ent_size)
3861 return NULL;
3862
3863 /*
3864 * The __find_next_entry() may call peek_next_entry(), which may
3865 * call ring_buffer_peek() that may make the contents of iter->ent
3866 * undefined. Need to copy iter->ent now.
3867 */
3868 if (iter->ent && iter->ent != iter->temp) {
3869 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3870 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3871 void *temp;
3872 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3873 if (!temp)
3874 return NULL;
3875 kfree(iter->temp);
3876 iter->temp = temp;
3877 iter->temp_size = iter->ent_size;
3878 }
3879 memcpy(iter->temp, iter->ent, iter->ent_size);
3880 iter->ent = iter->temp;
3881 }
3882 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3883 /* Put back the original ent_size */
3884 iter->ent_size = ent_size;
3885
3886 return entry;
3887 }
3888
3889 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3890 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3891 {
3892 iter->ent = __find_next_entry(iter, &iter->cpu,
3893 &iter->lost_events, &iter->ts);
3894
3895 if (iter->ent)
3896 trace_iterator_increment(iter);
3897
3898 return iter->ent ? iter : NULL;
3899 }
3900
trace_consume(struct trace_iterator * iter)3901 static void trace_consume(struct trace_iterator *iter)
3902 {
3903 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3904 &iter->lost_events);
3905 }
3906
s_next(struct seq_file * m,void * v,loff_t * pos)3907 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3908 {
3909 struct trace_iterator *iter = m->private;
3910 int i = (int)*pos;
3911 void *ent;
3912
3913 WARN_ON_ONCE(iter->leftover);
3914
3915 (*pos)++;
3916
3917 /* can't go backwards */
3918 if (iter->idx > i)
3919 return NULL;
3920
3921 if (iter->idx < 0)
3922 ent = trace_find_next_entry_inc(iter);
3923 else
3924 ent = iter;
3925
3926 while (ent && iter->idx < i)
3927 ent = trace_find_next_entry_inc(iter);
3928
3929 iter->pos = *pos;
3930
3931 return ent;
3932 }
3933
tracing_iter_reset(struct trace_iterator * iter,int cpu)3934 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3935 {
3936 struct ring_buffer_iter *buf_iter;
3937 unsigned long entries = 0;
3938 u64 ts;
3939
3940 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3941
3942 buf_iter = trace_buffer_iter(iter, cpu);
3943 if (!buf_iter)
3944 return;
3945
3946 ring_buffer_iter_reset(buf_iter);
3947
3948 /*
3949 * We could have the case with the max latency tracers
3950 * that a reset never took place on a cpu. This is evident
3951 * by the timestamp being before the start of the buffer.
3952 */
3953 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3954 if (ts >= iter->array_buffer->time_start)
3955 break;
3956 entries++;
3957 ring_buffer_iter_advance(buf_iter);
3958 /* This could be a big loop */
3959 cond_resched();
3960 }
3961
3962 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3963 }
3964
3965 /*
3966 * The current tracer is copied to avoid a global locking
3967 * all around.
3968 */
s_start(struct seq_file * m,loff_t * pos)3969 static void *s_start(struct seq_file *m, loff_t *pos)
3970 {
3971 struct trace_iterator *iter = m->private;
3972 struct trace_array *tr = iter->tr;
3973 int cpu_file = iter->cpu_file;
3974 void *p = NULL;
3975 loff_t l = 0;
3976 int cpu;
3977
3978 mutex_lock(&trace_types_lock);
3979 if (unlikely(tr->current_trace != iter->trace)) {
3980 /* Close iter->trace before switching to the new current tracer */
3981 if (iter->trace->close)
3982 iter->trace->close(iter);
3983 iter->trace = tr->current_trace;
3984 /* Reopen the new current tracer */
3985 if (iter->trace->open)
3986 iter->trace->open(iter);
3987 }
3988 mutex_unlock(&trace_types_lock);
3989
3990 #ifdef CONFIG_TRACER_MAX_TRACE
3991 if (iter->snapshot && iter->trace->use_max_tr)
3992 return ERR_PTR(-EBUSY);
3993 #endif
3994
3995 if (*pos != iter->pos) {
3996 iter->ent = NULL;
3997 iter->cpu = 0;
3998 iter->idx = -1;
3999
4000 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4001 for_each_tracing_cpu(cpu)
4002 tracing_iter_reset(iter, cpu);
4003 } else
4004 tracing_iter_reset(iter, cpu_file);
4005
4006 iter->leftover = 0;
4007 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4008 ;
4009
4010 } else {
4011 /*
4012 * If we overflowed the seq_file before, then we want
4013 * to just reuse the trace_seq buffer again.
4014 */
4015 if (iter->leftover)
4016 p = iter;
4017 else {
4018 l = *pos - 1;
4019 p = s_next(m, p, &l);
4020 }
4021 }
4022
4023 trace_event_read_lock();
4024 trace_access_lock(cpu_file);
4025 return p;
4026 }
4027
s_stop(struct seq_file * m,void * p)4028 static void s_stop(struct seq_file *m, void *p)
4029 {
4030 struct trace_iterator *iter = m->private;
4031
4032 #ifdef CONFIG_TRACER_MAX_TRACE
4033 if (iter->snapshot && iter->trace->use_max_tr)
4034 return;
4035 #endif
4036
4037 trace_access_unlock(iter->cpu_file);
4038 trace_event_read_unlock();
4039 }
4040
4041 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4042 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4043 unsigned long *entries, int cpu)
4044 {
4045 unsigned long count;
4046
4047 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4048 /*
4049 * If this buffer has skipped entries, then we hold all
4050 * entries for the trace and we need to ignore the
4051 * ones before the time stamp.
4052 */
4053 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4054 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4055 /* total is the same as the entries */
4056 *total = count;
4057 } else
4058 *total = count +
4059 ring_buffer_overrun_cpu(buf->buffer, cpu);
4060 *entries = count;
4061 }
4062
4063 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4064 get_total_entries(struct array_buffer *buf,
4065 unsigned long *total, unsigned long *entries)
4066 {
4067 unsigned long t, e;
4068 int cpu;
4069
4070 *total = 0;
4071 *entries = 0;
4072
4073 for_each_tracing_cpu(cpu) {
4074 get_total_entries_cpu(buf, &t, &e, cpu);
4075 *total += t;
4076 *entries += e;
4077 }
4078 }
4079
trace_total_entries_cpu(struct trace_array * tr,int cpu)4080 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4081 {
4082 unsigned long total, entries;
4083
4084 if (!tr)
4085 tr = &global_trace;
4086
4087 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4088
4089 return entries;
4090 }
4091
trace_total_entries(struct trace_array * tr)4092 unsigned long trace_total_entries(struct trace_array *tr)
4093 {
4094 unsigned long total, entries;
4095
4096 if (!tr)
4097 tr = &global_trace;
4098
4099 get_total_entries(&tr->array_buffer, &total, &entries);
4100
4101 return entries;
4102 }
4103
print_lat_help_header(struct seq_file * m)4104 static void print_lat_help_header(struct seq_file *m)
4105 {
4106 seq_puts(m, "# _------=> CPU# \n"
4107 "# / _-----=> irqs-off/BH-disabled\n"
4108 "# | / _----=> need-resched \n"
4109 "# || / _---=> hardirq/softirq \n"
4110 "# ||| / _--=> preempt-depth \n"
4111 "# |||| / _-=> migrate-disable \n"
4112 "# ||||| / delay \n"
4113 "# cmd pid |||||| time | caller \n"
4114 "# \\ / |||||| \\ | / \n");
4115 }
4116
print_event_info(struct array_buffer * buf,struct seq_file * m)4117 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4118 {
4119 unsigned long total;
4120 unsigned long entries;
4121
4122 get_total_entries(buf, &total, &entries);
4123 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4124 entries, total, num_online_cpus());
4125 seq_puts(m, "#\n");
4126 }
4127
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4128 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4129 unsigned int flags)
4130 {
4131 bool tgid = flags & TRACE_ITER(RECORD_TGID);
4132
4133 print_event_info(buf, m);
4134
4135 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4136 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4137 }
4138
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4139 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4140 unsigned int flags)
4141 {
4142 bool tgid = flags & TRACE_ITER(RECORD_TGID);
4143 static const char space[] = " ";
4144 int prec = tgid ? 12 : 2;
4145
4146 print_event_info(buf, m);
4147
4148 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4149 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4150 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4151 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4152 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4153 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4154 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4155 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4156 }
4157
4158 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4159 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4160 {
4161 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4162 struct array_buffer *buf = iter->array_buffer;
4163 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4164 struct tracer *type = iter->trace;
4165 unsigned long entries;
4166 unsigned long total;
4167 const char *name = type->name;
4168
4169 get_total_entries(buf, &total, &entries);
4170
4171 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4172 name, init_utsname()->release);
4173 seq_puts(m, "# -----------------------------------"
4174 "---------------------------------\n");
4175 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4176 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4177 nsecs_to_usecs(data->saved_latency),
4178 entries,
4179 total,
4180 buf->cpu,
4181 preempt_model_str(),
4182 /* These are reserved for later use */
4183 0, 0, 0, 0);
4184 #ifdef CONFIG_SMP
4185 seq_printf(m, " #P:%d)\n", num_online_cpus());
4186 #else
4187 seq_puts(m, ")\n");
4188 #endif
4189 seq_puts(m, "# -----------------\n");
4190 seq_printf(m, "# | task: %.16s-%d "
4191 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4192 data->comm, data->pid,
4193 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4194 data->policy, data->rt_priority);
4195 seq_puts(m, "# -----------------\n");
4196
4197 if (data->critical_start) {
4198 seq_puts(m, "# => started at: ");
4199 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4200 trace_print_seq(m, &iter->seq);
4201 seq_puts(m, "\n# => ended at: ");
4202 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4203 trace_print_seq(m, &iter->seq);
4204 seq_puts(m, "\n#\n");
4205 }
4206
4207 seq_puts(m, "#\n");
4208 }
4209
test_cpu_buff_start(struct trace_iterator * iter)4210 static void test_cpu_buff_start(struct trace_iterator *iter)
4211 {
4212 struct trace_seq *s = &iter->seq;
4213 struct trace_array *tr = iter->tr;
4214
4215 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
4216 return;
4217
4218 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4219 return;
4220
4221 if (cpumask_available(iter->started) &&
4222 cpumask_test_cpu(iter->cpu, iter->started))
4223 return;
4224
4225 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4226 return;
4227
4228 if (cpumask_available(iter->started))
4229 cpumask_set_cpu(iter->cpu, iter->started);
4230
4231 /* Don't print started cpu buffer for the first entry of the trace */
4232 if (iter->idx > 1)
4233 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4234 iter->cpu);
4235 }
4236
4237 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)4238 static bool is_syscall_event(struct trace_event *event)
4239 {
4240 return (event->funcs == &enter_syscall_print_funcs) ||
4241 (event->funcs == &exit_syscall_print_funcs);
4242
4243 }
4244 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
4245 #else
is_syscall_event(struct trace_event * event)4246 static inline bool is_syscall_event(struct trace_event *event)
4247 {
4248 return false;
4249 }
4250 #define syscall_buf_size 0
4251 #endif /* CONFIG_FTRACE_SYSCALLS */
4252
print_trace_fmt(struct trace_iterator * iter)4253 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4254 {
4255 struct trace_array *tr = iter->tr;
4256 struct trace_seq *s = &iter->seq;
4257 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4258 struct trace_entry *entry;
4259 struct trace_event *event;
4260
4261 entry = iter->ent;
4262
4263 test_cpu_buff_start(iter);
4264
4265 event = ftrace_find_event(entry->type);
4266
4267 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4268 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4269 trace_print_lat_context(iter);
4270 else
4271 trace_print_context(iter);
4272 }
4273
4274 if (trace_seq_has_overflowed(s))
4275 return TRACE_TYPE_PARTIAL_LINE;
4276
4277 if (event) {
4278 if (tr->trace_flags & TRACE_ITER(FIELDS))
4279 return print_event_fields(iter, event);
4280 /*
4281 * For TRACE_EVENT() events, the print_fmt is not
4282 * safe to use if the array has delta offsets
4283 * Force printing via the fields.
4284 */
4285 if ((tr->text_delta)) {
4286 /* ftrace and system call events are still OK */
4287 if ((event->type > __TRACE_LAST_TYPE) &&
4288 !is_syscall_event(event))
4289 return print_event_fields(iter, event);
4290 }
4291 return event->funcs->trace(iter, sym_flags, event);
4292 }
4293
4294 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4295
4296 return trace_handle_return(s);
4297 }
4298
print_raw_fmt(struct trace_iterator * iter)4299 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4300 {
4301 struct trace_array *tr = iter->tr;
4302 struct trace_seq *s = &iter->seq;
4303 struct trace_entry *entry;
4304 struct trace_event *event;
4305
4306 entry = iter->ent;
4307
4308 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
4309 trace_seq_printf(s, "%d %d %llu ",
4310 entry->pid, iter->cpu, iter->ts);
4311
4312 if (trace_seq_has_overflowed(s))
4313 return TRACE_TYPE_PARTIAL_LINE;
4314
4315 event = ftrace_find_event(entry->type);
4316 if (event)
4317 return event->funcs->raw(iter, 0, event);
4318
4319 trace_seq_printf(s, "%d ?\n", entry->type);
4320
4321 return trace_handle_return(s);
4322 }
4323
print_hex_fmt(struct trace_iterator * iter)4324 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4325 {
4326 struct trace_array *tr = iter->tr;
4327 struct trace_seq *s = &iter->seq;
4328 unsigned char newline = '\n';
4329 struct trace_entry *entry;
4330 struct trace_event *event;
4331
4332 entry = iter->ent;
4333
4334 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4335 SEQ_PUT_HEX_FIELD(s, entry->pid);
4336 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4337 SEQ_PUT_HEX_FIELD(s, iter->ts);
4338 if (trace_seq_has_overflowed(s))
4339 return TRACE_TYPE_PARTIAL_LINE;
4340 }
4341
4342 event = ftrace_find_event(entry->type);
4343 if (event) {
4344 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4345 if (ret != TRACE_TYPE_HANDLED)
4346 return ret;
4347 }
4348
4349 SEQ_PUT_FIELD(s, newline);
4350
4351 return trace_handle_return(s);
4352 }
4353
print_bin_fmt(struct trace_iterator * iter)4354 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4355 {
4356 struct trace_array *tr = iter->tr;
4357 struct trace_seq *s = &iter->seq;
4358 struct trace_entry *entry;
4359 struct trace_event *event;
4360
4361 entry = iter->ent;
4362
4363 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
4364 SEQ_PUT_FIELD(s, entry->pid);
4365 SEQ_PUT_FIELD(s, iter->cpu);
4366 SEQ_PUT_FIELD(s, iter->ts);
4367 if (trace_seq_has_overflowed(s))
4368 return TRACE_TYPE_PARTIAL_LINE;
4369 }
4370
4371 event = ftrace_find_event(entry->type);
4372 return event ? event->funcs->binary(iter, 0, event) :
4373 TRACE_TYPE_HANDLED;
4374 }
4375
trace_empty(struct trace_iterator * iter)4376 int trace_empty(struct trace_iterator *iter)
4377 {
4378 struct ring_buffer_iter *buf_iter;
4379 int cpu;
4380
4381 /* If we are looking at one CPU buffer, only check that one */
4382 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4383 cpu = iter->cpu_file;
4384 buf_iter = trace_buffer_iter(iter, cpu);
4385 if (buf_iter) {
4386 if (!ring_buffer_iter_empty(buf_iter))
4387 return 0;
4388 } else {
4389 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4390 return 0;
4391 }
4392 return 1;
4393 }
4394
4395 for_each_tracing_cpu(cpu) {
4396 buf_iter = trace_buffer_iter(iter, cpu);
4397 if (buf_iter) {
4398 if (!ring_buffer_iter_empty(buf_iter))
4399 return 0;
4400 } else {
4401 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4402 return 0;
4403 }
4404 }
4405
4406 return 1;
4407 }
4408
4409 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4410 enum print_line_t print_trace_line(struct trace_iterator *iter)
4411 {
4412 struct trace_array *tr = iter->tr;
4413 unsigned long trace_flags = tr->trace_flags;
4414 enum print_line_t ret;
4415
4416 if (iter->lost_events) {
4417 if (iter->lost_events == (unsigned long)-1)
4418 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4419 iter->cpu);
4420 else
4421 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4422 iter->cpu, iter->lost_events);
4423 if (trace_seq_has_overflowed(&iter->seq))
4424 return TRACE_TYPE_PARTIAL_LINE;
4425 }
4426
4427 if (iter->trace && iter->trace->print_line) {
4428 ret = iter->trace->print_line(iter);
4429 if (ret != TRACE_TYPE_UNHANDLED)
4430 return ret;
4431 }
4432
4433 if (iter->ent->type == TRACE_BPUTS &&
4434 trace_flags & TRACE_ITER(PRINTK) &&
4435 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4436 return trace_print_bputs_msg_only(iter);
4437
4438 if (iter->ent->type == TRACE_BPRINT &&
4439 trace_flags & TRACE_ITER(PRINTK) &&
4440 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4441 return trace_print_bprintk_msg_only(iter);
4442
4443 if (iter->ent->type == TRACE_PRINT &&
4444 trace_flags & TRACE_ITER(PRINTK) &&
4445 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
4446 return trace_print_printk_msg_only(iter);
4447
4448 if (trace_flags & TRACE_ITER(BIN))
4449 return print_bin_fmt(iter);
4450
4451 if (trace_flags & TRACE_ITER(HEX))
4452 return print_hex_fmt(iter);
4453
4454 if (trace_flags & TRACE_ITER(RAW))
4455 return print_raw_fmt(iter);
4456
4457 return print_trace_fmt(iter);
4458 }
4459
trace_latency_header(struct seq_file * m)4460 void trace_latency_header(struct seq_file *m)
4461 {
4462 struct trace_iterator *iter = m->private;
4463 struct trace_array *tr = iter->tr;
4464
4465 /* print nothing if the buffers are empty */
4466 if (trace_empty(iter))
4467 return;
4468
4469 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4470 print_trace_header(m, iter);
4471
4472 if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
4473 print_lat_help_header(m);
4474 }
4475
trace_default_header(struct seq_file * m)4476 void trace_default_header(struct seq_file *m)
4477 {
4478 struct trace_iterator *iter = m->private;
4479 struct trace_array *tr = iter->tr;
4480 unsigned long trace_flags = tr->trace_flags;
4481
4482 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
4483 return;
4484
4485 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4486 /* print nothing if the buffers are empty */
4487 if (trace_empty(iter))
4488 return;
4489 print_trace_header(m, iter);
4490 if (!(trace_flags & TRACE_ITER(VERBOSE)))
4491 print_lat_help_header(m);
4492 } else {
4493 if (!(trace_flags & TRACE_ITER(VERBOSE))) {
4494 if (trace_flags & TRACE_ITER(IRQ_INFO))
4495 print_func_help_header_irq(iter->array_buffer,
4496 m, trace_flags);
4497 else
4498 print_func_help_header(iter->array_buffer, m,
4499 trace_flags);
4500 }
4501 }
4502 }
4503
test_ftrace_alive(struct seq_file * m)4504 static void test_ftrace_alive(struct seq_file *m)
4505 {
4506 if (!ftrace_is_dead())
4507 return;
4508 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4509 "# MAY BE MISSING FUNCTION EVENTS\n");
4510 }
4511
4512 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4513 static void show_snapshot_main_help(struct seq_file *m)
4514 {
4515 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4516 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4517 "# Takes a snapshot of the main buffer.\n"
4518 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4519 "# (Doesn't have to be '2' works with any number that\n"
4520 "# is not a '0' or '1')\n");
4521 }
4522
show_snapshot_percpu_help(struct seq_file * m)4523 static void show_snapshot_percpu_help(struct seq_file *m)
4524 {
4525 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4526 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4527 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4528 "# Takes a snapshot of the main buffer for this cpu.\n");
4529 #else
4530 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4531 "# Must use main snapshot file to allocate.\n");
4532 #endif
4533 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4534 "# (Doesn't have to be '2' works with any number that\n"
4535 "# is not a '0' or '1')\n");
4536 }
4537
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4538 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4539 {
4540 if (iter->tr->allocated_snapshot)
4541 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4542 else
4543 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4544
4545 seq_puts(m, "# Snapshot commands:\n");
4546 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4547 show_snapshot_main_help(m);
4548 else
4549 show_snapshot_percpu_help(m);
4550 }
4551 #else
4552 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4553 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4554 #endif
4555
s_show(struct seq_file * m,void * v)4556 static int s_show(struct seq_file *m, void *v)
4557 {
4558 struct trace_iterator *iter = v;
4559 int ret;
4560
4561 if (iter->ent == NULL) {
4562 if (iter->tr) {
4563 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4564 seq_puts(m, "#\n");
4565 test_ftrace_alive(m);
4566 }
4567 if (iter->snapshot && trace_empty(iter))
4568 print_snapshot_help(m, iter);
4569 else if (iter->trace && iter->trace->print_header)
4570 iter->trace->print_header(m);
4571 else
4572 trace_default_header(m);
4573
4574 } else if (iter->leftover) {
4575 /*
4576 * If we filled the seq_file buffer earlier, we
4577 * want to just show it now.
4578 */
4579 ret = trace_print_seq(m, &iter->seq);
4580
4581 /* ret should this time be zero, but you never know */
4582 iter->leftover = ret;
4583
4584 } else {
4585 ret = print_trace_line(iter);
4586 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4587 iter->seq.full = 0;
4588 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4589 }
4590 ret = trace_print_seq(m, &iter->seq);
4591 /*
4592 * If we overflow the seq_file buffer, then it will
4593 * ask us for this data again at start up.
4594 * Use that instead.
4595 * ret is 0 if seq_file write succeeded.
4596 * -1 otherwise.
4597 */
4598 iter->leftover = ret;
4599 }
4600
4601 return 0;
4602 }
4603
4604 /*
4605 * Should be used after trace_array_get(), trace_types_lock
4606 * ensures that i_cdev was already initialized.
4607 */
tracing_get_cpu(struct inode * inode)4608 static inline int tracing_get_cpu(struct inode *inode)
4609 {
4610 if (inode->i_cdev) /* See trace_create_cpu_file() */
4611 return (long)inode->i_cdev - 1;
4612 return RING_BUFFER_ALL_CPUS;
4613 }
4614
4615 static const struct seq_operations tracer_seq_ops = {
4616 .start = s_start,
4617 .next = s_next,
4618 .stop = s_stop,
4619 .show = s_show,
4620 };
4621
4622 /*
4623 * Note, as iter itself can be allocated and freed in different
4624 * ways, this function is only used to free its content, and not
4625 * the iterator itself. The only requirement to all the allocations
4626 * is that it must zero all fields (kzalloc), as freeing works with
4627 * ethier allocated content or NULL.
4628 */
free_trace_iter_content(struct trace_iterator * iter)4629 static void free_trace_iter_content(struct trace_iterator *iter)
4630 {
4631 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4632 if (iter->fmt != static_fmt_buf)
4633 kfree(iter->fmt);
4634
4635 kfree(iter->temp);
4636 kfree(iter->buffer_iter);
4637 mutex_destroy(&iter->mutex);
4638 free_cpumask_var(iter->started);
4639 }
4640
4641 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4642 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4643 {
4644 struct trace_array *tr = inode->i_private;
4645 struct trace_iterator *iter;
4646 int cpu;
4647
4648 if (tracing_disabled)
4649 return ERR_PTR(-ENODEV);
4650
4651 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4652 if (!iter)
4653 return ERR_PTR(-ENOMEM);
4654
4655 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4656 GFP_KERNEL);
4657 if (!iter->buffer_iter)
4658 goto release;
4659
4660 /*
4661 * trace_find_next_entry() may need to save off iter->ent.
4662 * It will place it into the iter->temp buffer. As most
4663 * events are less than 128, allocate a buffer of that size.
4664 * If one is greater, then trace_find_next_entry() will
4665 * allocate a new buffer to adjust for the bigger iter->ent.
4666 * It's not critical if it fails to get allocated here.
4667 */
4668 iter->temp = kmalloc(128, GFP_KERNEL);
4669 if (iter->temp)
4670 iter->temp_size = 128;
4671
4672 /*
4673 * trace_event_printf() may need to modify given format
4674 * string to replace %p with %px so that it shows real address
4675 * instead of hash value. However, that is only for the event
4676 * tracing, other tracer may not need. Defer the allocation
4677 * until it is needed.
4678 */
4679 iter->fmt = NULL;
4680 iter->fmt_size = 0;
4681
4682 mutex_lock(&trace_types_lock);
4683 iter->trace = tr->current_trace;
4684
4685 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4686 goto fail;
4687
4688 iter->tr = tr;
4689
4690 #ifdef CONFIG_TRACER_MAX_TRACE
4691 /* Currently only the top directory has a snapshot */
4692 if (tr->current_trace->print_max || snapshot)
4693 iter->array_buffer = &tr->max_buffer;
4694 else
4695 #endif
4696 iter->array_buffer = &tr->array_buffer;
4697 iter->snapshot = snapshot;
4698 iter->pos = -1;
4699 iter->cpu_file = tracing_get_cpu(inode);
4700 mutex_init(&iter->mutex);
4701
4702 /* Notify the tracer early; before we stop tracing. */
4703 if (iter->trace->open)
4704 iter->trace->open(iter);
4705
4706 /* Annotate start of buffers if we had overruns */
4707 if (ring_buffer_overruns(iter->array_buffer->buffer))
4708 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4709
4710 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4711 if (trace_clocks[tr->clock_id].in_ns)
4712 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4713
4714 /*
4715 * If pause-on-trace is enabled, then stop the trace while
4716 * dumping, unless this is the "snapshot" file
4717 */
4718 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
4719 iter->iter_flags |= TRACE_FILE_PAUSE;
4720 tracing_stop_tr(tr);
4721 }
4722
4723 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4724 for_each_tracing_cpu(cpu) {
4725 iter->buffer_iter[cpu] =
4726 ring_buffer_read_start(iter->array_buffer->buffer,
4727 cpu, GFP_KERNEL);
4728 tracing_iter_reset(iter, cpu);
4729 }
4730 } else {
4731 cpu = iter->cpu_file;
4732 iter->buffer_iter[cpu] =
4733 ring_buffer_read_start(iter->array_buffer->buffer,
4734 cpu, GFP_KERNEL);
4735 tracing_iter_reset(iter, cpu);
4736 }
4737
4738 mutex_unlock(&trace_types_lock);
4739
4740 return iter;
4741
4742 fail:
4743 mutex_unlock(&trace_types_lock);
4744 free_trace_iter_content(iter);
4745 release:
4746 seq_release_private(inode, file);
4747 return ERR_PTR(-ENOMEM);
4748 }
4749
tracing_open_generic(struct inode * inode,struct file * filp)4750 int tracing_open_generic(struct inode *inode, struct file *filp)
4751 {
4752 int ret;
4753
4754 ret = tracing_check_open_get_tr(NULL);
4755 if (ret)
4756 return ret;
4757
4758 filp->private_data = inode->i_private;
4759 return 0;
4760 }
4761
tracing_is_disabled(void)4762 bool tracing_is_disabled(void)
4763 {
4764 return (tracing_disabled) ? true: false;
4765 }
4766
4767 /*
4768 * Open and update trace_array ref count.
4769 * Must have the current trace_array passed to it.
4770 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4771 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4772 {
4773 struct trace_array *tr = inode->i_private;
4774 int ret;
4775
4776 ret = tracing_check_open_get_tr(tr);
4777 if (ret)
4778 return ret;
4779
4780 filp->private_data = inode->i_private;
4781
4782 return 0;
4783 }
4784
4785 /*
4786 * The private pointer of the inode is the trace_event_file.
4787 * Update the tr ref count associated to it.
4788 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4789 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4790 {
4791 struct trace_event_file *file = inode->i_private;
4792 int ret;
4793
4794 ret = tracing_check_open_get_tr(file->tr);
4795 if (ret)
4796 return ret;
4797
4798 guard(mutex)(&event_mutex);
4799
4800 /* Fail if the file is marked for removal */
4801 if (file->flags & EVENT_FILE_FL_FREED) {
4802 trace_array_put(file->tr);
4803 return -ENODEV;
4804 } else {
4805 event_file_get(file);
4806 }
4807
4808 filp->private_data = inode->i_private;
4809
4810 return 0;
4811 }
4812
tracing_release_file_tr(struct inode * inode,struct file * filp)4813 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4814 {
4815 struct trace_event_file *file = inode->i_private;
4816
4817 trace_array_put(file->tr);
4818 event_file_put(file);
4819
4820 return 0;
4821 }
4822
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4823 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4824 {
4825 tracing_release_file_tr(inode, filp);
4826 return single_release(inode, filp);
4827 }
4828
tracing_release(struct inode * inode,struct file * file)4829 static int tracing_release(struct inode *inode, struct file *file)
4830 {
4831 struct trace_array *tr = inode->i_private;
4832 struct seq_file *m = file->private_data;
4833 struct trace_iterator *iter;
4834 int cpu;
4835
4836 if (!(file->f_mode & FMODE_READ)) {
4837 trace_array_put(tr);
4838 return 0;
4839 }
4840
4841 /* Writes do not use seq_file */
4842 iter = m->private;
4843 mutex_lock(&trace_types_lock);
4844
4845 for_each_tracing_cpu(cpu) {
4846 if (iter->buffer_iter[cpu])
4847 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4848 }
4849
4850 if (iter->trace && iter->trace->close)
4851 iter->trace->close(iter);
4852
4853 if (iter->iter_flags & TRACE_FILE_PAUSE)
4854 /* reenable tracing if it was previously enabled */
4855 tracing_start_tr(tr);
4856
4857 __trace_array_put(tr);
4858
4859 mutex_unlock(&trace_types_lock);
4860
4861 free_trace_iter_content(iter);
4862 seq_release_private(inode, file);
4863
4864 return 0;
4865 }
4866
tracing_release_generic_tr(struct inode * inode,struct file * file)4867 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4868 {
4869 struct trace_array *tr = inode->i_private;
4870
4871 trace_array_put(tr);
4872 return 0;
4873 }
4874
tracing_single_release_tr(struct inode * inode,struct file * file)4875 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4876 {
4877 struct trace_array *tr = inode->i_private;
4878
4879 trace_array_put(tr);
4880
4881 return single_release(inode, file);
4882 }
4883
tracing_open(struct inode * inode,struct file * file)4884 static int tracing_open(struct inode *inode, struct file *file)
4885 {
4886 struct trace_array *tr = inode->i_private;
4887 struct trace_iterator *iter;
4888 int ret;
4889
4890 ret = tracing_check_open_get_tr(tr);
4891 if (ret)
4892 return ret;
4893
4894 /* If this file was open for write, then erase contents */
4895 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4896 int cpu = tracing_get_cpu(inode);
4897 struct array_buffer *trace_buf = &tr->array_buffer;
4898
4899 #ifdef CONFIG_TRACER_MAX_TRACE
4900 if (tr->current_trace->print_max)
4901 trace_buf = &tr->max_buffer;
4902 #endif
4903
4904 if (cpu == RING_BUFFER_ALL_CPUS)
4905 tracing_reset_online_cpus(trace_buf);
4906 else
4907 tracing_reset_cpu(trace_buf, cpu);
4908 }
4909
4910 if (file->f_mode & FMODE_READ) {
4911 iter = __tracing_open(inode, file, false);
4912 if (IS_ERR(iter))
4913 ret = PTR_ERR(iter);
4914 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
4915 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4916 }
4917
4918 if (ret < 0)
4919 trace_array_put(tr);
4920
4921 return ret;
4922 }
4923
4924 /*
4925 * Some tracers are not suitable for instance buffers.
4926 * A tracer is always available for the global array (toplevel)
4927 * or if it explicitly states that it is.
4928 */
4929 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4930 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4931 {
4932 #ifdef CONFIG_TRACER_SNAPSHOT
4933 /* arrays with mapped buffer range do not have snapshots */
4934 if (tr->range_addr_start && t->use_max_tr)
4935 return false;
4936 #endif
4937 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4938 }
4939
4940 /* Find the next tracer that this trace array may use */
4941 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4942 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4943 {
4944 while (t && !trace_ok_for_array(t, tr))
4945 t = t->next;
4946
4947 return t;
4948 }
4949
4950 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4951 t_next(struct seq_file *m, void *v, loff_t *pos)
4952 {
4953 struct trace_array *tr = m->private;
4954 struct tracer *t = v;
4955
4956 (*pos)++;
4957
4958 if (t)
4959 t = get_tracer_for_array(tr, t->next);
4960
4961 return t;
4962 }
4963
t_start(struct seq_file * m,loff_t * pos)4964 static void *t_start(struct seq_file *m, loff_t *pos)
4965 {
4966 struct trace_array *tr = m->private;
4967 struct tracer *t;
4968 loff_t l = 0;
4969
4970 mutex_lock(&trace_types_lock);
4971
4972 t = get_tracer_for_array(tr, trace_types);
4973 for (; t && l < *pos; t = t_next(m, t, &l))
4974 ;
4975
4976 return t;
4977 }
4978
t_stop(struct seq_file * m,void * p)4979 static void t_stop(struct seq_file *m, void *p)
4980 {
4981 mutex_unlock(&trace_types_lock);
4982 }
4983
t_show(struct seq_file * m,void * v)4984 static int t_show(struct seq_file *m, void *v)
4985 {
4986 struct tracer *t = v;
4987
4988 if (!t)
4989 return 0;
4990
4991 seq_puts(m, t->name);
4992 if (t->next)
4993 seq_putc(m, ' ');
4994 else
4995 seq_putc(m, '\n');
4996
4997 return 0;
4998 }
4999
5000 static const struct seq_operations show_traces_seq_ops = {
5001 .start = t_start,
5002 .next = t_next,
5003 .stop = t_stop,
5004 .show = t_show,
5005 };
5006
show_traces_open(struct inode * inode,struct file * file)5007 static int show_traces_open(struct inode *inode, struct file *file)
5008 {
5009 struct trace_array *tr = inode->i_private;
5010 struct seq_file *m;
5011 int ret;
5012
5013 ret = tracing_check_open_get_tr(tr);
5014 if (ret)
5015 return ret;
5016
5017 ret = seq_open(file, &show_traces_seq_ops);
5018 if (ret) {
5019 trace_array_put(tr);
5020 return ret;
5021 }
5022
5023 m = file->private_data;
5024 m->private = tr;
5025
5026 return 0;
5027 }
5028
tracing_seq_release(struct inode * inode,struct file * file)5029 static int tracing_seq_release(struct inode *inode, struct file *file)
5030 {
5031 struct trace_array *tr = inode->i_private;
5032
5033 trace_array_put(tr);
5034 return seq_release(inode, file);
5035 }
5036
5037 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5038 tracing_write_stub(struct file *filp, const char __user *ubuf,
5039 size_t count, loff_t *ppos)
5040 {
5041 return count;
5042 }
5043
tracing_lseek(struct file * file,loff_t offset,int whence)5044 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5045 {
5046 int ret;
5047
5048 if (file->f_mode & FMODE_READ)
5049 ret = seq_lseek(file, offset, whence);
5050 else
5051 file->f_pos = ret = 0;
5052
5053 return ret;
5054 }
5055
5056 static const struct file_operations tracing_fops = {
5057 .open = tracing_open,
5058 .read = seq_read,
5059 .read_iter = seq_read_iter,
5060 .splice_read = copy_splice_read,
5061 .write = tracing_write_stub,
5062 .llseek = tracing_lseek,
5063 .release = tracing_release,
5064 };
5065
5066 static const struct file_operations show_traces_fops = {
5067 .open = show_traces_open,
5068 .read = seq_read,
5069 .llseek = seq_lseek,
5070 .release = tracing_seq_release,
5071 };
5072
5073 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5074 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5075 size_t count, loff_t *ppos)
5076 {
5077 struct trace_array *tr = file_inode(filp)->i_private;
5078 char *mask_str __free(kfree) = NULL;
5079 int len;
5080
5081 len = snprintf(NULL, 0, "%*pb\n",
5082 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5083 mask_str = kmalloc(len, GFP_KERNEL);
5084 if (!mask_str)
5085 return -ENOMEM;
5086
5087 len = snprintf(mask_str, len, "%*pb\n",
5088 cpumask_pr_args(tr->tracing_cpumask));
5089 if (len >= count)
5090 return -EINVAL;
5091
5092 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5093 }
5094
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5095 int tracing_set_cpumask(struct trace_array *tr,
5096 cpumask_var_t tracing_cpumask_new)
5097 {
5098 int cpu;
5099
5100 if (!tr)
5101 return -EINVAL;
5102
5103 local_irq_disable();
5104 arch_spin_lock(&tr->max_lock);
5105 for_each_tracing_cpu(cpu) {
5106 /*
5107 * Increase/decrease the disabled counter if we are
5108 * about to flip a bit in the cpumask:
5109 */
5110 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5113 #ifdef CONFIG_TRACER_MAX_TRACE
5114 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5115 #endif
5116 }
5117 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5118 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5119 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5120 #ifdef CONFIG_TRACER_MAX_TRACE
5121 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5122 #endif
5123 }
5124 }
5125 arch_spin_unlock(&tr->max_lock);
5126 local_irq_enable();
5127
5128 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5129
5130 return 0;
5131 }
5132
5133 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5134 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5135 size_t count, loff_t *ppos)
5136 {
5137 struct trace_array *tr = file_inode(filp)->i_private;
5138 cpumask_var_t tracing_cpumask_new;
5139 int err;
5140
5141 if (count == 0 || count > KMALLOC_MAX_SIZE)
5142 return -EINVAL;
5143
5144 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5145 return -ENOMEM;
5146
5147 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5148 if (err)
5149 goto err_free;
5150
5151 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5152 if (err)
5153 goto err_free;
5154
5155 free_cpumask_var(tracing_cpumask_new);
5156
5157 return count;
5158
5159 err_free:
5160 free_cpumask_var(tracing_cpumask_new);
5161
5162 return err;
5163 }
5164
5165 static const struct file_operations tracing_cpumask_fops = {
5166 .open = tracing_open_generic_tr,
5167 .read = tracing_cpumask_read,
5168 .write = tracing_cpumask_write,
5169 .release = tracing_release_generic_tr,
5170 .llseek = generic_file_llseek,
5171 };
5172
tracing_trace_options_show(struct seq_file * m,void * v)5173 static int tracing_trace_options_show(struct seq_file *m, void *v)
5174 {
5175 struct tracer_opt *trace_opts;
5176 struct trace_array *tr = m->private;
5177 struct tracer_flags *flags;
5178 u32 tracer_flags;
5179 int i;
5180
5181 guard(mutex)(&trace_types_lock);
5182
5183 for (i = 0; trace_options[i]; i++) {
5184 if (tr->trace_flags & (1ULL << i))
5185 seq_printf(m, "%s\n", trace_options[i]);
5186 else
5187 seq_printf(m, "no%s\n", trace_options[i]);
5188 }
5189
5190 flags = tr->current_trace_flags;
5191 if (!flags || !flags->opts)
5192 return 0;
5193
5194 tracer_flags = flags->val;
5195 trace_opts = flags->opts;
5196
5197 for (i = 0; trace_opts[i].name; i++) {
5198 if (tracer_flags & trace_opts[i].bit)
5199 seq_printf(m, "%s\n", trace_opts[i].name);
5200 else
5201 seq_printf(m, "no%s\n", trace_opts[i].name);
5202 }
5203
5204 return 0;
5205 }
5206
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5207 static int __set_tracer_option(struct trace_array *tr,
5208 struct tracer_flags *tracer_flags,
5209 struct tracer_opt *opts, int neg)
5210 {
5211 struct tracer *trace = tracer_flags->trace;
5212 int ret = 0;
5213
5214 if (trace->set_flag)
5215 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5216 if (ret)
5217 return ret;
5218
5219 if (neg)
5220 tracer_flags->val &= ~opts->bit;
5221 else
5222 tracer_flags->val |= opts->bit;
5223 return 0;
5224 }
5225
5226 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5227 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5228 {
5229 struct tracer_flags *tracer_flags = tr->current_trace_flags;
5230 struct tracer_opt *opts = NULL;
5231 int i;
5232
5233 if (!tracer_flags || !tracer_flags->opts)
5234 return 0;
5235
5236 for (i = 0; tracer_flags->opts[i].name; i++) {
5237 opts = &tracer_flags->opts[i];
5238
5239 if (strcmp(cmp, opts->name) == 0)
5240 return __set_tracer_option(tr, tracer_flags, opts, neg);
5241 }
5242
5243 return -EINVAL;
5244 }
5245
5246 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)5247 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
5248 {
5249 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
5250 return -1;
5251
5252 return 0;
5253 }
5254
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)5255 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
5256 {
5257 switch (mask) {
5258 case TRACE_ITER(RECORD_TGID):
5259 case TRACE_ITER(RECORD_CMD):
5260 case TRACE_ITER(TRACE_PRINTK):
5261 case TRACE_ITER(COPY_MARKER):
5262 lockdep_assert_held(&event_mutex);
5263 }
5264
5265 /* do nothing if flag is already set */
5266 if (!!(tr->trace_flags & mask) == !!enabled)
5267 return 0;
5268
5269 /* Give the tracer a chance to approve the change */
5270 if (tr->current_trace->flag_changed)
5271 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5272 return -EINVAL;
5273
5274 switch (mask) {
5275 case TRACE_ITER(TRACE_PRINTK):
5276 if (enabled) {
5277 update_printk_trace(tr);
5278 } else {
5279 /*
5280 * The global_trace cannot clear this.
5281 * It's flag only gets cleared if another instance sets it.
5282 */
5283 if (printk_trace == &global_trace)
5284 return -EINVAL;
5285 /*
5286 * An instance must always have it set.
5287 * by default, that's the global_trace instance.
5288 */
5289 if (printk_trace == tr)
5290 update_printk_trace(&global_trace);
5291 }
5292 break;
5293
5294 case TRACE_ITER(COPY_MARKER):
5295 update_marker_trace(tr, enabled);
5296 /* update_marker_trace updates the tr->trace_flags */
5297 return 0;
5298 }
5299
5300 if (enabled)
5301 tr->trace_flags |= mask;
5302 else
5303 tr->trace_flags &= ~mask;
5304
5305 switch (mask) {
5306 case TRACE_ITER(RECORD_CMD):
5307 trace_event_enable_cmd_record(enabled);
5308 break;
5309
5310 case TRACE_ITER(RECORD_TGID):
5311
5312 if (trace_alloc_tgid_map() < 0) {
5313 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
5314 return -ENOMEM;
5315 }
5316
5317 trace_event_enable_tgid_record(enabled);
5318 break;
5319
5320 case TRACE_ITER(EVENT_FORK):
5321 trace_event_follow_fork(tr, enabled);
5322 break;
5323
5324 case TRACE_ITER(FUNC_FORK):
5325 ftrace_pid_follow_fork(tr, enabled);
5326 break;
5327
5328 case TRACE_ITER(OVERWRITE):
5329 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5330 #ifdef CONFIG_TRACER_MAX_TRACE
5331 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5332 #endif
5333 break;
5334
5335 case TRACE_ITER(PRINTK):
5336 trace_printk_start_stop_comm(enabled);
5337 trace_printk_control(enabled);
5338 break;
5339
5340 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
5341 case TRACE_GRAPH_GRAPH_TIME:
5342 ftrace_graph_graph_time_control(enabled);
5343 break;
5344 #endif
5345 }
5346
5347 return 0;
5348 }
5349
trace_set_options(struct trace_array * tr,char * option)5350 int trace_set_options(struct trace_array *tr, char *option)
5351 {
5352 char *cmp;
5353 int neg = 0;
5354 int ret;
5355 size_t orig_len = strlen(option);
5356 int len;
5357
5358 cmp = strstrip(option);
5359
5360 len = str_has_prefix(cmp, "no");
5361 if (len)
5362 neg = 1;
5363
5364 cmp += len;
5365
5366 mutex_lock(&event_mutex);
5367 mutex_lock(&trace_types_lock);
5368
5369 ret = match_string(trace_options, -1, cmp);
5370 /* If no option could be set, test the specific tracer options */
5371 if (ret < 0)
5372 ret = set_tracer_option(tr, cmp, neg);
5373 else
5374 ret = set_tracer_flag(tr, 1ULL << ret, !neg);
5375
5376 mutex_unlock(&trace_types_lock);
5377 mutex_unlock(&event_mutex);
5378
5379 /*
5380 * If the first trailing whitespace is replaced with '\0' by strstrip,
5381 * turn it back into a space.
5382 */
5383 if (orig_len > strlen(option))
5384 option[strlen(option)] = ' ';
5385
5386 return ret;
5387 }
5388
apply_trace_boot_options(void)5389 static void __init apply_trace_boot_options(void)
5390 {
5391 char *buf = trace_boot_options_buf;
5392 char *option;
5393
5394 while (true) {
5395 option = strsep(&buf, ",");
5396
5397 if (!option)
5398 break;
5399
5400 if (*option)
5401 trace_set_options(&global_trace, option);
5402
5403 /* Put back the comma to allow this to be called again */
5404 if (buf)
5405 *(buf - 1) = ',';
5406 }
5407 }
5408
5409 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5410 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5411 size_t cnt, loff_t *ppos)
5412 {
5413 struct seq_file *m = filp->private_data;
5414 struct trace_array *tr = m->private;
5415 char buf[64];
5416 int ret;
5417
5418 if (cnt >= sizeof(buf))
5419 return -EINVAL;
5420
5421 if (copy_from_user(buf, ubuf, cnt))
5422 return -EFAULT;
5423
5424 buf[cnt] = 0;
5425
5426 ret = trace_set_options(tr, buf);
5427 if (ret < 0)
5428 return ret;
5429
5430 *ppos += cnt;
5431
5432 return cnt;
5433 }
5434
tracing_trace_options_open(struct inode * inode,struct file * file)5435 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5436 {
5437 struct trace_array *tr = inode->i_private;
5438 int ret;
5439
5440 ret = tracing_check_open_get_tr(tr);
5441 if (ret)
5442 return ret;
5443
5444 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5445 if (ret < 0)
5446 trace_array_put(tr);
5447
5448 return ret;
5449 }
5450
5451 static const struct file_operations tracing_iter_fops = {
5452 .open = tracing_trace_options_open,
5453 .read = seq_read,
5454 .llseek = seq_lseek,
5455 .release = tracing_single_release_tr,
5456 .write = tracing_trace_options_write,
5457 };
5458
5459 static const char readme_msg[] =
5460 "tracing mini-HOWTO:\n\n"
5461 "By default tracefs removes all OTH file permission bits.\n"
5462 "When mounting tracefs an optional group id can be specified\n"
5463 "which adds the group to every directory and file in tracefs:\n\n"
5464 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5465 "# echo 0 > tracing_on : quick way to disable tracing\n"
5466 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5467 " Important files:\n"
5468 " trace\t\t\t- The static contents of the buffer\n"
5469 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5470 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5471 " current_tracer\t- function and latency tracers\n"
5472 " available_tracers\t- list of configured tracers for current_tracer\n"
5473 " error_log\t- error log for failed commands (that support it)\n"
5474 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5475 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5476 " trace_clock\t\t- change the clock used to order events\n"
5477 " local: Per cpu clock but may not be synced across CPUs\n"
5478 " global: Synced across CPUs but slows tracing down.\n"
5479 " counter: Not a clock, but just an increment\n"
5480 " uptime: Jiffy counter from time of boot\n"
5481 " perf: Same clock that perf events use\n"
5482 #ifdef CONFIG_X86_64
5483 " x86-tsc: TSC cycle counter\n"
5484 #endif
5485 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5486 " delta: Delta difference against a buffer-wide timestamp\n"
5487 " absolute: Absolute (standalone) timestamp\n"
5488 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5489 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5490 " tracing_cpumask\t- Limit which CPUs to trace\n"
5491 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5492 "\t\t\t Remove sub-buffer with rmdir\n"
5493 " trace_options\t\t- Set format or modify how tracing happens\n"
5494 "\t\t\t Disable an option by prefixing 'no' to the\n"
5495 "\t\t\t option name\n"
5496 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5497 #ifdef CONFIG_DYNAMIC_FTRACE
5498 "\n available_filter_functions - list of functions that can be filtered on\n"
5499 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5500 "\t\t\t functions\n"
5501 "\t accepts: func_full_name or glob-matching-pattern\n"
5502 "\t modules: Can select a group via module\n"
5503 "\t Format: :mod:<module-name>\n"
5504 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5505 "\t triggers: a command to perform when function is hit\n"
5506 "\t Format: <function>:<trigger>[:count]\n"
5507 "\t trigger: traceon, traceoff\n"
5508 "\t\t enable_event:<system>:<event>\n"
5509 "\t\t disable_event:<system>:<event>\n"
5510 #ifdef CONFIG_STACKTRACE
5511 "\t\t stacktrace\n"
5512 #endif
5513 #ifdef CONFIG_TRACER_SNAPSHOT
5514 "\t\t snapshot\n"
5515 #endif
5516 "\t\t dump\n"
5517 "\t\t cpudump\n"
5518 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5519 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5520 "\t The first one will disable tracing every time do_fault is hit\n"
5521 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5522 "\t The first time do trap is hit and it disables tracing, the\n"
5523 "\t counter will decrement to 2. If tracing is already disabled,\n"
5524 "\t the counter will not decrement. It only decrements when the\n"
5525 "\t trigger did work\n"
5526 "\t To remove trigger without count:\n"
5527 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5528 "\t To remove trigger with a count:\n"
5529 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5530 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5531 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5532 "\t modules: Can select a group via module command :mod:\n"
5533 "\t Does not accept triggers\n"
5534 #endif /* CONFIG_DYNAMIC_FTRACE */
5535 #ifdef CONFIG_FUNCTION_TRACER
5536 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5537 "\t\t (function)\n"
5538 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5539 "\t\t (function)\n"
5540 #endif
5541 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5542 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5543 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5544 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5545 #endif
5546 #ifdef CONFIG_TRACER_SNAPSHOT
5547 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5548 "\t\t\t snapshot buffer. Read the contents for more\n"
5549 "\t\t\t information\n"
5550 #endif
5551 #ifdef CONFIG_STACK_TRACER
5552 " stack_trace\t\t- Shows the max stack trace when active\n"
5553 " stack_max_size\t- Shows current max stack size that was traced\n"
5554 "\t\t\t Write into this file to reset the max size (trigger a\n"
5555 "\t\t\t new trace)\n"
5556 #ifdef CONFIG_DYNAMIC_FTRACE
5557 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5558 "\t\t\t traces\n"
5559 #endif
5560 #endif /* CONFIG_STACK_TRACER */
5561 #ifdef CONFIG_DYNAMIC_EVENTS
5562 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5563 "\t\t\t Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #ifdef CONFIG_KPROBE_EVENTS
5566 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5567 "\t\t\t Write into this file to define/undefine new trace events.\n"
5568 #endif
5569 #ifdef CONFIG_UPROBE_EVENTS
5570 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5571 "\t\t\t Write into this file to define/undefine new trace events.\n"
5572 #endif
5573 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5574 defined(CONFIG_FPROBE_EVENTS)
5575 "\t accepts: event-definitions (one definition per line)\n"
5576 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5577 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5578 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5579 #endif
5580 #ifdef CONFIG_FPROBE_EVENTS
5581 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5582 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585 "\t s:[synthetic/]<event> <field> [<field>]\n"
5586 #endif
5587 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5588 "\t -:[<group>/][<event>]\n"
5589 #ifdef CONFIG_KPROBE_EVENTS
5590 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5591 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5592 #endif
5593 #ifdef CONFIG_UPROBE_EVENTS
5594 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5595 #endif
5596 "\t args: <name>=fetcharg[:type]\n"
5597 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5598 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5599 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5600 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5601 "\t <argname>[->field[->field|.field...]],\n"
5602 #endif
5603 #else
5604 "\t $stack<index>, $stack, $retval, $comm,\n"
5605 #endif
5606 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5607 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5608 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5609 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5610 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5611 #ifdef CONFIG_HIST_TRIGGERS
5612 "\t field: <stype> <name>;\n"
5613 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5614 "\t [unsigned] char/int/long\n"
5615 #endif
5616 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5617 "\t of the <attached-group>/<attached-event>.\n"
5618 #endif
5619 " set_event\t\t- Enables events by name written into it\n"
5620 "\t\t\t Can enable module events via: :mod:<module>\n"
5621 " events/\t\t- Directory containing all trace event subsystems:\n"
5622 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5623 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5624 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5625 "\t\t\t events\n"
5626 " filter\t\t- If set, only events passing filter are traced\n"
5627 " events/<system>/<event>/\t- Directory containing control files for\n"
5628 "\t\t\t <event>:\n"
5629 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5630 " filter\t\t- If set, only events passing filter are traced\n"
5631 " trigger\t\t- If set, a command to perform when event is hit\n"
5632 "\t Format: <trigger>[:count][if <filter>]\n"
5633 "\t trigger: traceon, traceoff\n"
5634 "\t enable_event:<system>:<event>\n"
5635 "\t disable_event:<system>:<event>\n"
5636 #ifdef CONFIG_HIST_TRIGGERS
5637 "\t enable_hist:<system>:<event>\n"
5638 "\t disable_hist:<system>:<event>\n"
5639 #endif
5640 #ifdef CONFIG_STACKTRACE
5641 "\t\t stacktrace\n"
5642 #endif
5643 #ifdef CONFIG_TRACER_SNAPSHOT
5644 "\t\t snapshot\n"
5645 #endif
5646 #ifdef CONFIG_HIST_TRIGGERS
5647 "\t\t hist (see below)\n"
5648 #endif
5649 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5650 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5651 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5652 "\t events/block/block_unplug/trigger\n"
5653 "\t The first disables tracing every time block_unplug is hit.\n"
5654 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5655 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5656 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5657 "\t Like function triggers, the counter is only decremented if it\n"
5658 "\t enabled or disabled tracing.\n"
5659 "\t To remove a trigger without a count:\n"
5660 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5661 "\t To remove a trigger with a count:\n"
5662 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5663 "\t Filters can be ignored when removing a trigger.\n"
5664 #ifdef CONFIG_HIST_TRIGGERS
5665 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5666 "\t Format: hist:keys=<field1[,field2,...]>\n"
5667 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5668 "\t [:values=<field1[,field2,...]>]\n"
5669 "\t [:sort=<field1[,field2,...]>]\n"
5670 "\t [:size=#entries]\n"
5671 "\t [:pause][:continue][:clear]\n"
5672 "\t [:name=histname1]\n"
5673 "\t [:nohitcount]\n"
5674 "\t [:<handler>.<action>]\n"
5675 "\t [if <filter>]\n\n"
5676 "\t Note, special fields can be used as well:\n"
5677 "\t common_timestamp - to record current timestamp\n"
5678 "\t common_cpu - to record the CPU the event happened on\n"
5679 "\n"
5680 "\t A hist trigger variable can be:\n"
5681 "\t - a reference to a field e.g. x=current_timestamp,\n"
5682 "\t - a reference to another variable e.g. y=$x,\n"
5683 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5684 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5685 "\n"
5686 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5687 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5688 "\t variable reference, field or numeric literal.\n"
5689 "\n"
5690 "\t When a matching event is hit, an entry is added to a hash\n"
5691 "\t table using the key(s) and value(s) named, and the value of a\n"
5692 "\t sum called 'hitcount' is incremented. Keys and values\n"
5693 "\t correspond to fields in the event's format description. Keys\n"
5694 "\t can be any field, or the special string 'common_stacktrace'.\n"
5695 "\t Compound keys consisting of up to two fields can be specified\n"
5696 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5697 "\t fields. Sort keys consisting of up to two fields can be\n"
5698 "\t specified using the 'sort' keyword. The sort direction can\n"
5699 "\t be modified by appending '.descending' or '.ascending' to a\n"
5700 "\t sort field. The 'size' parameter can be used to specify more\n"
5701 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5702 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5703 "\t its histogram data will be shared with other triggers of the\n"
5704 "\t same name, and trigger hits will update this common data.\n\n"
5705 "\t Reading the 'hist' file for the event will dump the hash\n"
5706 "\t table in its entirety to stdout. If there are multiple hist\n"
5707 "\t triggers attached to an event, there will be a table for each\n"
5708 "\t trigger in the output. The table displayed for a named\n"
5709 "\t trigger will be the same as any other instance having the\n"
5710 "\t same name. The default format used to display a given field\n"
5711 "\t can be modified by appending any of the following modifiers\n"
5712 "\t to the field name, as applicable:\n\n"
5713 "\t .hex display a number as a hex value\n"
5714 "\t .sym display an address as a symbol\n"
5715 "\t .sym-offset display an address as a symbol and offset\n"
5716 "\t .execname display a common_pid as a program name\n"
5717 "\t .syscall display a syscall id as a syscall name\n"
5718 "\t .log2 display log2 value rather than raw number\n"
5719 "\t .buckets=size display values in groups of size rather than raw number\n"
5720 "\t .usecs display a common_timestamp in microseconds\n"
5721 "\t .percent display a number of percentage value\n"
5722 "\t .graph display a bar-graph of a value\n\n"
5723 "\t The 'pause' parameter can be used to pause an existing hist\n"
5724 "\t trigger or to start a hist trigger but not log any events\n"
5725 "\t until told to do so. 'continue' can be used to start or\n"
5726 "\t restart a paused hist trigger.\n\n"
5727 "\t The 'clear' parameter will clear the contents of a running\n"
5728 "\t hist trigger and leave its current paused/active state\n"
5729 "\t unchanged.\n\n"
5730 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5731 "\t raw hitcount in the histogram.\n\n"
5732 "\t The enable_hist and disable_hist triggers can be used to\n"
5733 "\t have one event conditionally start and stop another event's\n"
5734 "\t already-attached hist trigger. The syntax is analogous to\n"
5735 "\t the enable_event and disable_event triggers.\n\n"
5736 "\t Hist trigger handlers and actions are executed whenever a\n"
5737 "\t a histogram entry is added or updated. They take the form:\n\n"
5738 "\t <handler>.<action>\n\n"
5739 "\t The available handlers are:\n\n"
5740 "\t onmatch(matching.event) - invoke on addition or update\n"
5741 "\t onmax(var) - invoke if var exceeds current max\n"
5742 "\t onchange(var) - invoke action if var changes\n\n"
5743 "\t The available actions are:\n\n"
5744 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5745 "\t save(field,...) - save current event fields\n"
5746 #ifdef CONFIG_TRACER_SNAPSHOT
5747 "\t snapshot() - snapshot the trace buffer\n\n"
5748 #endif
5749 #ifdef CONFIG_SYNTH_EVENTS
5750 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5751 "\t Write into this file to define/undefine new synthetic events.\n"
5752 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5753 #endif
5754 #endif
5755 ;
5756
5757 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5758 tracing_readme_read(struct file *filp, char __user *ubuf,
5759 size_t cnt, loff_t *ppos)
5760 {
5761 return simple_read_from_buffer(ubuf, cnt, ppos,
5762 readme_msg, strlen(readme_msg));
5763 }
5764
5765 static const struct file_operations tracing_readme_fops = {
5766 .open = tracing_open_generic,
5767 .read = tracing_readme_read,
5768 .llseek = generic_file_llseek,
5769 };
5770
5771 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5772 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5773 update_eval_map(union trace_eval_map_item *ptr)
5774 {
5775 if (!ptr->map.eval_string) {
5776 if (ptr->tail.next) {
5777 ptr = ptr->tail.next;
5778 /* Set ptr to the next real item (skip head) */
5779 ptr++;
5780 } else
5781 return NULL;
5782 }
5783 return ptr;
5784 }
5785
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5786 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5787 {
5788 union trace_eval_map_item *ptr = v;
5789
5790 /*
5791 * Paranoid! If ptr points to end, we don't want to increment past it.
5792 * This really should never happen.
5793 */
5794 (*pos)++;
5795 ptr = update_eval_map(ptr);
5796 if (WARN_ON_ONCE(!ptr))
5797 return NULL;
5798
5799 ptr++;
5800 ptr = update_eval_map(ptr);
5801
5802 return ptr;
5803 }
5804
eval_map_start(struct seq_file * m,loff_t * pos)5805 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5806 {
5807 union trace_eval_map_item *v;
5808 loff_t l = 0;
5809
5810 mutex_lock(&trace_eval_mutex);
5811
5812 v = trace_eval_maps;
5813 if (v)
5814 v++;
5815
5816 while (v && l < *pos) {
5817 v = eval_map_next(m, v, &l);
5818 }
5819
5820 return v;
5821 }
5822
eval_map_stop(struct seq_file * m,void * v)5823 static void eval_map_stop(struct seq_file *m, void *v)
5824 {
5825 mutex_unlock(&trace_eval_mutex);
5826 }
5827
eval_map_show(struct seq_file * m,void * v)5828 static int eval_map_show(struct seq_file *m, void *v)
5829 {
5830 union trace_eval_map_item *ptr = v;
5831
5832 seq_printf(m, "%s %ld (%s)\n",
5833 ptr->map.eval_string, ptr->map.eval_value,
5834 ptr->map.system);
5835
5836 return 0;
5837 }
5838
5839 static const struct seq_operations tracing_eval_map_seq_ops = {
5840 .start = eval_map_start,
5841 .next = eval_map_next,
5842 .stop = eval_map_stop,
5843 .show = eval_map_show,
5844 };
5845
tracing_eval_map_open(struct inode * inode,struct file * filp)5846 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5847 {
5848 int ret;
5849
5850 ret = tracing_check_open_get_tr(NULL);
5851 if (ret)
5852 return ret;
5853
5854 return seq_open(filp, &tracing_eval_map_seq_ops);
5855 }
5856
5857 static const struct file_operations tracing_eval_map_fops = {
5858 .open = tracing_eval_map_open,
5859 .read = seq_read,
5860 .llseek = seq_lseek,
5861 .release = seq_release,
5862 };
5863
5864 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5865 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5866 {
5867 /* Return tail of array given the head */
5868 return ptr + ptr->head.length + 1;
5869 }
5870
5871 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5872 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5873 int len)
5874 {
5875 struct trace_eval_map **stop;
5876 struct trace_eval_map **map;
5877 union trace_eval_map_item *map_array;
5878 union trace_eval_map_item *ptr;
5879
5880 stop = start + len;
5881
5882 /*
5883 * The trace_eval_maps contains the map plus a head and tail item,
5884 * where the head holds the module and length of array, and the
5885 * tail holds a pointer to the next list.
5886 */
5887 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5888 if (!map_array) {
5889 pr_warn("Unable to allocate trace eval mapping\n");
5890 return;
5891 }
5892
5893 guard(mutex)(&trace_eval_mutex);
5894
5895 if (!trace_eval_maps)
5896 trace_eval_maps = map_array;
5897 else {
5898 ptr = trace_eval_maps;
5899 for (;;) {
5900 ptr = trace_eval_jmp_to_tail(ptr);
5901 if (!ptr->tail.next)
5902 break;
5903 ptr = ptr->tail.next;
5904
5905 }
5906 ptr->tail.next = map_array;
5907 }
5908 map_array->head.mod = mod;
5909 map_array->head.length = len;
5910 map_array++;
5911
5912 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5913 map_array->map = **map;
5914 map_array++;
5915 }
5916 memset(map_array, 0, sizeof(*map_array));
5917 }
5918
trace_create_eval_file(struct dentry * d_tracer)5919 static void trace_create_eval_file(struct dentry *d_tracer)
5920 {
5921 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5922 NULL, &tracing_eval_map_fops);
5923 }
5924
5925 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5926 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5927 static inline void trace_insert_eval_map_file(struct module *mod,
5928 struct trace_eval_map **start, int len) { }
5929 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5930
5931 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5932 trace_event_update_with_eval_map(struct module *mod,
5933 struct trace_eval_map **start,
5934 int len)
5935 {
5936 struct trace_eval_map **map;
5937
5938 /* Always run sanitizer only if btf_type_tag attr exists. */
5939 if (len <= 0) {
5940 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5941 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5942 __has_attribute(btf_type_tag)))
5943 return;
5944 }
5945
5946 map = start;
5947
5948 trace_event_update_all(map, len);
5949
5950 if (len <= 0)
5951 return;
5952
5953 trace_insert_eval_map_file(mod, start, len);
5954 }
5955
5956 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5957 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5958 size_t cnt, loff_t *ppos)
5959 {
5960 struct trace_array *tr = filp->private_data;
5961 char buf[MAX_TRACER_SIZE+2];
5962 int r;
5963
5964 scoped_guard(mutex, &trace_types_lock) {
5965 r = sprintf(buf, "%s\n", tr->current_trace->name);
5966 }
5967
5968 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5969 }
5970
tracer_init(struct tracer * t,struct trace_array * tr)5971 int tracer_init(struct tracer *t, struct trace_array *tr)
5972 {
5973 tracing_reset_online_cpus(&tr->array_buffer);
5974 return t->init(tr);
5975 }
5976
set_buffer_entries(struct array_buffer * buf,unsigned long val)5977 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5978 {
5979 int cpu;
5980
5981 for_each_tracing_cpu(cpu)
5982 per_cpu_ptr(buf->data, cpu)->entries = val;
5983 }
5984
update_buffer_entries(struct array_buffer * buf,int cpu)5985 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5986 {
5987 if (cpu == RING_BUFFER_ALL_CPUS) {
5988 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5989 } else {
5990 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5991 }
5992 }
5993
5994 #ifdef CONFIG_TRACER_MAX_TRACE
5995 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5996 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5997 struct array_buffer *size_buf, int cpu_id)
5998 {
5999 int cpu, ret = 0;
6000
6001 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6002 for_each_tracing_cpu(cpu) {
6003 ret = ring_buffer_resize(trace_buf->buffer,
6004 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6005 if (ret < 0)
6006 break;
6007 per_cpu_ptr(trace_buf->data, cpu)->entries =
6008 per_cpu_ptr(size_buf->data, cpu)->entries;
6009 }
6010 } else {
6011 ret = ring_buffer_resize(trace_buf->buffer,
6012 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6013 if (ret == 0)
6014 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6015 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6016 }
6017
6018 return ret;
6019 }
6020 #endif /* CONFIG_TRACER_MAX_TRACE */
6021
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6022 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6023 unsigned long size, int cpu)
6024 {
6025 int ret;
6026
6027 /*
6028 * If kernel or user changes the size of the ring buffer
6029 * we use the size that was given, and we can forget about
6030 * expanding it later.
6031 */
6032 trace_set_ring_buffer_expanded(tr);
6033
6034 /* May be called before buffers are initialized */
6035 if (!tr->array_buffer.buffer)
6036 return 0;
6037
6038 /* Do not allow tracing while resizing ring buffer */
6039 tracing_stop_tr(tr);
6040
6041 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6042 if (ret < 0)
6043 goto out_start;
6044
6045 #ifdef CONFIG_TRACER_MAX_TRACE
6046 if (!tr->allocated_snapshot)
6047 goto out;
6048
6049 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6050 if (ret < 0) {
6051 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6052 &tr->array_buffer, cpu);
6053 if (r < 0) {
6054 /*
6055 * AARGH! We are left with different
6056 * size max buffer!!!!
6057 * The max buffer is our "snapshot" buffer.
6058 * When a tracer needs a snapshot (one of the
6059 * latency tracers), it swaps the max buffer
6060 * with the saved snap shot. We succeeded to
6061 * update the size of the main buffer, but failed to
6062 * update the size of the max buffer. But when we tried
6063 * to reset the main buffer to the original size, we
6064 * failed there too. This is very unlikely to
6065 * happen, but if it does, warn and kill all
6066 * tracing.
6067 */
6068 WARN_ON(1);
6069 tracing_disabled = 1;
6070 }
6071 goto out_start;
6072 }
6073
6074 update_buffer_entries(&tr->max_buffer, cpu);
6075
6076 out:
6077 #endif /* CONFIG_TRACER_MAX_TRACE */
6078
6079 update_buffer_entries(&tr->array_buffer, cpu);
6080 out_start:
6081 tracing_start_tr(tr);
6082 return ret;
6083 }
6084
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6085 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6086 unsigned long size, int cpu_id)
6087 {
6088 guard(mutex)(&trace_types_lock);
6089
6090 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6091 /* make sure, this cpu is enabled in the mask */
6092 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6093 return -EINVAL;
6094 }
6095
6096 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6097 }
6098
6099 struct trace_mod_entry {
6100 unsigned long mod_addr;
6101 char mod_name[MODULE_NAME_LEN];
6102 };
6103
6104 struct trace_scratch {
6105 unsigned int clock_id;
6106 unsigned long text_addr;
6107 unsigned long nr_entries;
6108 struct trace_mod_entry entries[];
6109 };
6110
6111 static DEFINE_MUTEX(scratch_mutex);
6112
cmp_mod_entry(const void * key,const void * pivot)6113 static int cmp_mod_entry(const void *key, const void *pivot)
6114 {
6115 unsigned long addr = (unsigned long)key;
6116 const struct trace_mod_entry *ent = pivot;
6117
6118 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6119 return 0;
6120 else
6121 return addr - ent->mod_addr;
6122 }
6123
6124 /**
6125 * trace_adjust_address() - Adjust prev boot address to current address.
6126 * @tr: Persistent ring buffer's trace_array.
6127 * @addr: Address in @tr which is adjusted.
6128 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6129 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6130 {
6131 struct trace_module_delta *module_delta;
6132 struct trace_scratch *tscratch;
6133 struct trace_mod_entry *entry;
6134 unsigned long raddr;
6135 int idx = 0, nr_entries;
6136
6137 /* If we don't have last boot delta, return the address */
6138 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6139 return addr;
6140
6141 /* tr->module_delta must be protected by rcu. */
6142 guard(rcu)();
6143 tscratch = tr->scratch;
6144 /* if there is no tscrach, module_delta must be NULL. */
6145 module_delta = READ_ONCE(tr->module_delta);
6146 if (!module_delta || !tscratch->nr_entries ||
6147 tscratch->entries[0].mod_addr > addr) {
6148 raddr = addr + tr->text_delta;
6149 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6150 is_kernel_rodata(raddr) ? raddr : addr;
6151 }
6152
6153 /* Note that entries must be sorted. */
6154 nr_entries = tscratch->nr_entries;
6155 if (nr_entries == 1 ||
6156 tscratch->entries[nr_entries - 1].mod_addr < addr)
6157 idx = nr_entries - 1;
6158 else {
6159 entry = __inline_bsearch((void *)addr,
6160 tscratch->entries,
6161 nr_entries - 1,
6162 sizeof(tscratch->entries[0]),
6163 cmp_mod_entry);
6164 if (entry)
6165 idx = entry - tscratch->entries;
6166 }
6167
6168 return addr + module_delta->delta[idx];
6169 }
6170
6171 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6172 static int save_mod(struct module *mod, void *data)
6173 {
6174 struct trace_array *tr = data;
6175 struct trace_scratch *tscratch;
6176 struct trace_mod_entry *entry;
6177 unsigned int size;
6178
6179 tscratch = tr->scratch;
6180 if (!tscratch)
6181 return -1;
6182 size = tr->scratch_size;
6183
6184 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6185 return -1;
6186
6187 entry = &tscratch->entries[tscratch->nr_entries];
6188
6189 tscratch->nr_entries++;
6190
6191 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6192 strscpy(entry->mod_name, mod->name);
6193
6194 return 0;
6195 }
6196 #else
save_mod(struct module * mod,void * data)6197 static int save_mod(struct module *mod, void *data)
6198 {
6199 return 0;
6200 }
6201 #endif
6202
update_last_data(struct trace_array * tr)6203 static void update_last_data(struct trace_array *tr)
6204 {
6205 struct trace_module_delta *module_delta;
6206 struct trace_scratch *tscratch;
6207
6208 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6209 return;
6210
6211 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6212 return;
6213
6214 /* Only if the buffer has previous boot data clear and update it. */
6215 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6216
6217 /* Reset the module list and reload them */
6218 if (tr->scratch) {
6219 struct trace_scratch *tscratch = tr->scratch;
6220
6221 tscratch->clock_id = tr->clock_id;
6222 memset(tscratch->entries, 0,
6223 flex_array_size(tscratch, entries, tscratch->nr_entries));
6224 tscratch->nr_entries = 0;
6225
6226 guard(mutex)(&scratch_mutex);
6227 module_for_each_mod(save_mod, tr);
6228 }
6229
6230 /*
6231 * Need to clear all CPU buffers as there cannot be events
6232 * from the previous boot mixed with events with this boot
6233 * as that will cause a confusing trace. Need to clear all
6234 * CPU buffers, even for those that may currently be offline.
6235 */
6236 tracing_reset_all_cpus(&tr->array_buffer);
6237
6238 /* Using current data now */
6239 tr->text_delta = 0;
6240
6241 if (!tr->scratch)
6242 return;
6243
6244 tscratch = tr->scratch;
6245 module_delta = READ_ONCE(tr->module_delta);
6246 WRITE_ONCE(tr->module_delta, NULL);
6247 kfree_rcu(module_delta, rcu);
6248
6249 /* Set the persistent ring buffer meta data to this address */
6250 tscratch->text_addr = (unsigned long)_text;
6251 }
6252
6253 /**
6254 * tracing_update_buffers - used by tracing facility to expand ring buffers
6255 * @tr: The tracing instance
6256 *
6257 * To save on memory when the tracing is never used on a system with it
6258 * configured in. The ring buffers are set to a minimum size. But once
6259 * a user starts to use the tracing facility, then they need to grow
6260 * to their default size.
6261 *
6262 * This function is to be called when a tracer is about to be used.
6263 */
tracing_update_buffers(struct trace_array * tr)6264 int tracing_update_buffers(struct trace_array *tr)
6265 {
6266 int ret = 0;
6267
6268 guard(mutex)(&trace_types_lock);
6269
6270 update_last_data(tr);
6271
6272 if (!tr->ring_buffer_expanded)
6273 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6274 RING_BUFFER_ALL_CPUS);
6275 return ret;
6276 }
6277
6278 /*
6279 * Used to clear out the tracer before deletion of an instance.
6280 * Must have trace_types_lock held.
6281 */
tracing_set_nop(struct trace_array * tr)6282 static void tracing_set_nop(struct trace_array *tr)
6283 {
6284 if (tr->current_trace == &nop_trace)
6285 return;
6286
6287 tr->current_trace->enabled--;
6288
6289 if (tr->current_trace->reset)
6290 tr->current_trace->reset(tr);
6291
6292 tr->current_trace = &nop_trace;
6293 tr->current_trace_flags = nop_trace.flags;
6294 }
6295
6296 static bool tracer_options_updated;
6297
tracing_set_tracer(struct trace_array * tr,const char * buf)6298 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6299 {
6300 struct tracer *trace = NULL;
6301 struct tracers *t;
6302 #ifdef CONFIG_TRACER_MAX_TRACE
6303 bool had_max_tr;
6304 #endif
6305 int ret;
6306
6307 guard(mutex)(&trace_types_lock);
6308
6309 update_last_data(tr);
6310
6311 if (!tr->ring_buffer_expanded) {
6312 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6313 RING_BUFFER_ALL_CPUS);
6314 if (ret < 0)
6315 return ret;
6316 ret = 0;
6317 }
6318
6319 list_for_each_entry(t, &tr->tracers, list) {
6320 if (strcmp(t->tracer->name, buf) == 0) {
6321 trace = t->tracer;
6322 break;
6323 }
6324 }
6325 if (!trace)
6326 return -EINVAL;
6327
6328 if (trace == tr->current_trace)
6329 return 0;
6330
6331 #ifdef CONFIG_TRACER_SNAPSHOT
6332 if (trace->use_max_tr) {
6333 local_irq_disable();
6334 arch_spin_lock(&tr->max_lock);
6335 ret = tr->cond_snapshot ? -EBUSY : 0;
6336 arch_spin_unlock(&tr->max_lock);
6337 local_irq_enable();
6338 if (ret)
6339 return ret;
6340 }
6341 #endif
6342 /* Some tracers won't work on kernel command line */
6343 if (system_state < SYSTEM_RUNNING && trace->noboot) {
6344 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6345 trace->name);
6346 return -EINVAL;
6347 }
6348
6349 /* Some tracers are only allowed for the top level buffer */
6350 if (!trace_ok_for_array(trace, tr))
6351 return -EINVAL;
6352
6353 /* If trace pipe files are being read, we can't change the tracer */
6354 if (tr->trace_ref)
6355 return -EBUSY;
6356
6357 trace_branch_disable();
6358
6359 tr->current_trace->enabled--;
6360
6361 if (tr->current_trace->reset)
6362 tr->current_trace->reset(tr);
6363
6364 #ifdef CONFIG_TRACER_MAX_TRACE
6365 had_max_tr = tr->current_trace->use_max_tr;
6366
6367 /* Current trace needs to be nop_trace before synchronize_rcu */
6368 tr->current_trace = &nop_trace;
6369 tr->current_trace_flags = nop_trace.flags;
6370
6371 if (had_max_tr && !trace->use_max_tr) {
6372 /*
6373 * We need to make sure that the update_max_tr sees that
6374 * current_trace changed to nop_trace to keep it from
6375 * swapping the buffers after we resize it.
6376 * The update_max_tr is called from interrupts disabled
6377 * so a synchronized_sched() is sufficient.
6378 */
6379 synchronize_rcu();
6380 free_snapshot(tr);
6381 tracing_disarm_snapshot(tr);
6382 }
6383
6384 if (!had_max_tr && trace->use_max_tr) {
6385 ret = tracing_arm_snapshot_locked(tr);
6386 if (ret)
6387 return ret;
6388 }
6389 #else
6390 tr->current_trace = &nop_trace;
6391 #endif
6392
6393 tr->current_trace_flags = t->flags ? : t->tracer->flags;
6394
6395 if (trace->init) {
6396 ret = tracer_init(trace, tr);
6397 if (ret) {
6398 #ifdef CONFIG_TRACER_MAX_TRACE
6399 if (trace->use_max_tr)
6400 tracing_disarm_snapshot(tr);
6401 #endif
6402 tr->current_trace_flags = nop_trace.flags;
6403 return ret;
6404 }
6405 }
6406
6407 tr->current_trace = trace;
6408 tr->current_trace->enabled++;
6409 trace_branch_enable(tr);
6410
6411 return 0;
6412 }
6413
6414 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6415 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6416 size_t cnt, loff_t *ppos)
6417 {
6418 struct trace_array *tr = filp->private_data;
6419 char buf[MAX_TRACER_SIZE+1];
6420 char *name;
6421 size_t ret;
6422 int err;
6423
6424 ret = cnt;
6425
6426 if (cnt > MAX_TRACER_SIZE)
6427 cnt = MAX_TRACER_SIZE;
6428
6429 if (copy_from_user(buf, ubuf, cnt))
6430 return -EFAULT;
6431
6432 buf[cnt] = 0;
6433
6434 name = strim(buf);
6435
6436 err = tracing_set_tracer(tr, name);
6437 if (err)
6438 return err;
6439
6440 *ppos += ret;
6441
6442 return ret;
6443 }
6444
6445 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6446 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6447 size_t cnt, loff_t *ppos)
6448 {
6449 char buf[64];
6450 int r;
6451
6452 r = snprintf(buf, sizeof(buf), "%ld\n",
6453 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6454 if (r > sizeof(buf))
6455 r = sizeof(buf);
6456 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6457 }
6458
6459 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6460 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6461 size_t cnt, loff_t *ppos)
6462 {
6463 unsigned long val;
6464 int ret;
6465
6466 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6467 if (ret)
6468 return ret;
6469
6470 *ptr = val * 1000;
6471
6472 return cnt;
6473 }
6474
6475 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6476 tracing_thresh_read(struct file *filp, char __user *ubuf,
6477 size_t cnt, loff_t *ppos)
6478 {
6479 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6480 }
6481
6482 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6483 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6484 size_t cnt, loff_t *ppos)
6485 {
6486 struct trace_array *tr = filp->private_data;
6487 int ret;
6488
6489 guard(mutex)(&trace_types_lock);
6490 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6491 if (ret < 0)
6492 return ret;
6493
6494 if (tr->current_trace->update_thresh) {
6495 ret = tr->current_trace->update_thresh(tr);
6496 if (ret < 0)
6497 return ret;
6498 }
6499
6500 return cnt;
6501 }
6502
6503 #ifdef CONFIG_TRACER_MAX_TRACE
6504
6505 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6506 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6507 size_t cnt, loff_t *ppos)
6508 {
6509 struct trace_array *tr = filp->private_data;
6510
6511 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6512 }
6513
6514 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6515 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6516 size_t cnt, loff_t *ppos)
6517 {
6518 struct trace_array *tr = filp->private_data;
6519
6520 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6521 }
6522
6523 #endif
6524
open_pipe_on_cpu(struct trace_array * tr,int cpu)6525 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6526 {
6527 if (cpu == RING_BUFFER_ALL_CPUS) {
6528 if (cpumask_empty(tr->pipe_cpumask)) {
6529 cpumask_setall(tr->pipe_cpumask);
6530 return 0;
6531 }
6532 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6533 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6534 return 0;
6535 }
6536 return -EBUSY;
6537 }
6538
close_pipe_on_cpu(struct trace_array * tr,int cpu)6539 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6540 {
6541 if (cpu == RING_BUFFER_ALL_CPUS) {
6542 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6543 cpumask_clear(tr->pipe_cpumask);
6544 } else {
6545 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6546 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6547 }
6548 }
6549
tracing_open_pipe(struct inode * inode,struct file * filp)6550 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6551 {
6552 struct trace_array *tr = inode->i_private;
6553 struct trace_iterator *iter;
6554 int cpu;
6555 int ret;
6556
6557 ret = tracing_check_open_get_tr(tr);
6558 if (ret)
6559 return ret;
6560
6561 guard(mutex)(&trace_types_lock);
6562 cpu = tracing_get_cpu(inode);
6563 ret = open_pipe_on_cpu(tr, cpu);
6564 if (ret)
6565 goto fail_pipe_on_cpu;
6566
6567 /* create a buffer to store the information to pass to userspace */
6568 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6569 if (!iter) {
6570 ret = -ENOMEM;
6571 goto fail_alloc_iter;
6572 }
6573
6574 trace_seq_init(&iter->seq);
6575 iter->trace = tr->current_trace;
6576
6577 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6578 ret = -ENOMEM;
6579 goto fail;
6580 }
6581
6582 /* trace pipe does not show start of buffer */
6583 cpumask_setall(iter->started);
6584
6585 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
6586 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6587
6588 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6589 if (trace_clocks[tr->clock_id].in_ns)
6590 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6591
6592 iter->tr = tr;
6593 iter->array_buffer = &tr->array_buffer;
6594 iter->cpu_file = cpu;
6595 mutex_init(&iter->mutex);
6596 filp->private_data = iter;
6597
6598 if (iter->trace->pipe_open)
6599 iter->trace->pipe_open(iter);
6600
6601 nonseekable_open(inode, filp);
6602
6603 tr->trace_ref++;
6604
6605 return ret;
6606
6607 fail:
6608 kfree(iter);
6609 fail_alloc_iter:
6610 close_pipe_on_cpu(tr, cpu);
6611 fail_pipe_on_cpu:
6612 __trace_array_put(tr);
6613 return ret;
6614 }
6615
tracing_release_pipe(struct inode * inode,struct file * file)6616 static int tracing_release_pipe(struct inode *inode, struct file *file)
6617 {
6618 struct trace_iterator *iter = file->private_data;
6619 struct trace_array *tr = inode->i_private;
6620
6621 scoped_guard(mutex, &trace_types_lock) {
6622 tr->trace_ref--;
6623
6624 if (iter->trace->pipe_close)
6625 iter->trace->pipe_close(iter);
6626 close_pipe_on_cpu(tr, iter->cpu_file);
6627 }
6628
6629 free_trace_iter_content(iter);
6630 kfree(iter);
6631
6632 trace_array_put(tr);
6633
6634 return 0;
6635 }
6636
6637 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6638 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6639 {
6640 struct trace_array *tr = iter->tr;
6641
6642 /* Iterators are static, they should be filled or empty */
6643 if (trace_buffer_iter(iter, iter->cpu_file))
6644 return EPOLLIN | EPOLLRDNORM;
6645
6646 if (tr->trace_flags & TRACE_ITER(BLOCK))
6647 /*
6648 * Always select as readable when in blocking mode
6649 */
6650 return EPOLLIN | EPOLLRDNORM;
6651 else
6652 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6653 filp, poll_table, iter->tr->buffer_percent);
6654 }
6655
6656 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6657 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6658 {
6659 struct trace_iterator *iter = filp->private_data;
6660
6661 return trace_poll(iter, filp, poll_table);
6662 }
6663
6664 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6665 static int tracing_wait_pipe(struct file *filp)
6666 {
6667 struct trace_iterator *iter = filp->private_data;
6668 int ret;
6669
6670 while (trace_empty(iter)) {
6671
6672 if ((filp->f_flags & O_NONBLOCK)) {
6673 return -EAGAIN;
6674 }
6675
6676 /*
6677 * We block until we read something and tracing is disabled.
6678 * We still block if tracing is disabled, but we have never
6679 * read anything. This allows a user to cat this file, and
6680 * then enable tracing. But after we have read something,
6681 * we give an EOF when tracing is again disabled.
6682 *
6683 * iter->pos will be 0 if we haven't read anything.
6684 */
6685 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6686 break;
6687
6688 mutex_unlock(&iter->mutex);
6689
6690 ret = wait_on_pipe(iter, 0);
6691
6692 mutex_lock(&iter->mutex);
6693
6694 if (ret)
6695 return ret;
6696 }
6697
6698 return 1;
6699 }
6700
update_last_data_if_empty(struct trace_array * tr)6701 static bool update_last_data_if_empty(struct trace_array *tr)
6702 {
6703 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6704 return false;
6705
6706 if (!ring_buffer_empty(tr->array_buffer.buffer))
6707 return false;
6708
6709 /*
6710 * If the buffer contains the last boot data and all per-cpu
6711 * buffers are empty, reset it from the kernel side.
6712 */
6713 update_last_data(tr);
6714 return true;
6715 }
6716
6717 /*
6718 * Consumer reader.
6719 */
6720 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6721 tracing_read_pipe(struct file *filp, char __user *ubuf,
6722 size_t cnt, loff_t *ppos)
6723 {
6724 struct trace_iterator *iter = filp->private_data;
6725 ssize_t sret;
6726
6727 /*
6728 * Avoid more than one consumer on a single file descriptor
6729 * This is just a matter of traces coherency, the ring buffer itself
6730 * is protected.
6731 */
6732 guard(mutex)(&iter->mutex);
6733
6734 /* return any leftover data */
6735 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6736 if (sret != -EBUSY)
6737 return sret;
6738
6739 trace_seq_init(&iter->seq);
6740
6741 if (iter->trace->read) {
6742 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6743 if (sret)
6744 return sret;
6745 }
6746
6747 waitagain:
6748 if (update_last_data_if_empty(iter->tr))
6749 return 0;
6750
6751 sret = tracing_wait_pipe(filp);
6752 if (sret <= 0)
6753 return sret;
6754
6755 /* stop when tracing is finished */
6756 if (trace_empty(iter))
6757 return 0;
6758
6759 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6760 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6761
6762 /* reset all but tr, trace, and overruns */
6763 trace_iterator_reset(iter);
6764 cpumask_clear(iter->started);
6765 trace_seq_init(&iter->seq);
6766
6767 trace_event_read_lock();
6768 trace_access_lock(iter->cpu_file);
6769 while (trace_find_next_entry_inc(iter) != NULL) {
6770 enum print_line_t ret;
6771 int save_len = iter->seq.seq.len;
6772
6773 ret = print_trace_line(iter);
6774 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6775 /*
6776 * If one print_trace_line() fills entire trace_seq in one shot,
6777 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6778 * In this case, we need to consume it, otherwise, loop will peek
6779 * this event next time, resulting in an infinite loop.
6780 */
6781 if (save_len == 0) {
6782 iter->seq.full = 0;
6783 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6784 trace_consume(iter);
6785 break;
6786 }
6787
6788 /* In other cases, don't print partial lines */
6789 iter->seq.seq.len = save_len;
6790 break;
6791 }
6792 if (ret != TRACE_TYPE_NO_CONSUME)
6793 trace_consume(iter);
6794
6795 if (trace_seq_used(&iter->seq) >= cnt)
6796 break;
6797
6798 /*
6799 * Setting the full flag means we reached the trace_seq buffer
6800 * size and we should leave by partial output condition above.
6801 * One of the trace_seq_* functions is not used properly.
6802 */
6803 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6804 iter->ent->type);
6805 }
6806 trace_access_unlock(iter->cpu_file);
6807 trace_event_read_unlock();
6808
6809 /* Now copy what we have to the user */
6810 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6811 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6812 trace_seq_init(&iter->seq);
6813
6814 /*
6815 * If there was nothing to send to user, in spite of consuming trace
6816 * entries, go back to wait for more entries.
6817 */
6818 if (sret == -EBUSY)
6819 goto waitagain;
6820
6821 return sret;
6822 }
6823
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6824 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6825 unsigned int idx)
6826 {
6827 __free_page(spd->pages[idx]);
6828 }
6829
6830 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6831 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6832 {
6833 size_t count;
6834 int save_len;
6835 int ret;
6836
6837 /* Seq buffer is page-sized, exactly what we need. */
6838 for (;;) {
6839 save_len = iter->seq.seq.len;
6840 ret = print_trace_line(iter);
6841
6842 if (trace_seq_has_overflowed(&iter->seq)) {
6843 iter->seq.seq.len = save_len;
6844 break;
6845 }
6846
6847 /*
6848 * This should not be hit, because it should only
6849 * be set if the iter->seq overflowed. But check it
6850 * anyway to be safe.
6851 */
6852 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6853 iter->seq.seq.len = save_len;
6854 break;
6855 }
6856
6857 count = trace_seq_used(&iter->seq) - save_len;
6858 if (rem < count) {
6859 rem = 0;
6860 iter->seq.seq.len = save_len;
6861 break;
6862 }
6863
6864 if (ret != TRACE_TYPE_NO_CONSUME)
6865 trace_consume(iter);
6866 rem -= count;
6867 if (!trace_find_next_entry_inc(iter)) {
6868 rem = 0;
6869 iter->ent = NULL;
6870 break;
6871 }
6872 }
6873
6874 return rem;
6875 }
6876
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6877 static ssize_t tracing_splice_read_pipe(struct file *filp,
6878 loff_t *ppos,
6879 struct pipe_inode_info *pipe,
6880 size_t len,
6881 unsigned int flags)
6882 {
6883 struct page *pages_def[PIPE_DEF_BUFFERS];
6884 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6885 struct trace_iterator *iter = filp->private_data;
6886 struct splice_pipe_desc spd = {
6887 .pages = pages_def,
6888 .partial = partial_def,
6889 .nr_pages = 0, /* This gets updated below. */
6890 .nr_pages_max = PIPE_DEF_BUFFERS,
6891 .ops = &default_pipe_buf_ops,
6892 .spd_release = tracing_spd_release_pipe,
6893 };
6894 ssize_t ret;
6895 size_t rem;
6896 unsigned int i;
6897
6898 if (splice_grow_spd(pipe, &spd))
6899 return -ENOMEM;
6900
6901 mutex_lock(&iter->mutex);
6902
6903 if (iter->trace->splice_read) {
6904 ret = iter->trace->splice_read(iter, filp,
6905 ppos, pipe, len, flags);
6906 if (ret)
6907 goto out_err;
6908 }
6909
6910 ret = tracing_wait_pipe(filp);
6911 if (ret <= 0)
6912 goto out_err;
6913
6914 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6915 ret = -EFAULT;
6916 goto out_err;
6917 }
6918
6919 trace_event_read_lock();
6920 trace_access_lock(iter->cpu_file);
6921
6922 /* Fill as many pages as possible. */
6923 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6924 spd.pages[i] = alloc_page(GFP_KERNEL);
6925 if (!spd.pages[i])
6926 break;
6927
6928 rem = tracing_fill_pipe_page(rem, iter);
6929
6930 /* Copy the data into the page, so we can start over. */
6931 ret = trace_seq_to_buffer(&iter->seq,
6932 page_address(spd.pages[i]),
6933 min((size_t)trace_seq_used(&iter->seq),
6934 (size_t)PAGE_SIZE));
6935 if (ret < 0) {
6936 __free_page(spd.pages[i]);
6937 break;
6938 }
6939 spd.partial[i].offset = 0;
6940 spd.partial[i].len = ret;
6941
6942 trace_seq_init(&iter->seq);
6943 }
6944
6945 trace_access_unlock(iter->cpu_file);
6946 trace_event_read_unlock();
6947 mutex_unlock(&iter->mutex);
6948
6949 spd.nr_pages = i;
6950
6951 if (i)
6952 ret = splice_to_pipe(pipe, &spd);
6953 else
6954 ret = 0;
6955 out:
6956 splice_shrink_spd(&spd);
6957 return ret;
6958
6959 out_err:
6960 mutex_unlock(&iter->mutex);
6961 goto out;
6962 }
6963
6964 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6965 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
6966 size_t cnt, loff_t *ppos)
6967 {
6968 struct inode *inode = file_inode(filp);
6969 struct trace_array *tr = inode->i_private;
6970 char buf[64];
6971 int r;
6972
6973 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
6974
6975 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6976 }
6977
6978 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6979 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
6980 size_t cnt, loff_t *ppos)
6981 {
6982 struct inode *inode = file_inode(filp);
6983 struct trace_array *tr = inode->i_private;
6984 unsigned long val;
6985 int ret;
6986
6987 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6988 if (ret)
6989 return ret;
6990
6991 if (val > SYSCALL_FAULT_USER_MAX)
6992 val = SYSCALL_FAULT_USER_MAX;
6993
6994 tr->syscall_buf_sz = val;
6995
6996 *ppos += cnt;
6997
6998 return cnt;
6999 }
7000
7001 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7002 tracing_entries_read(struct file *filp, char __user *ubuf,
7003 size_t cnt, loff_t *ppos)
7004 {
7005 struct inode *inode = file_inode(filp);
7006 struct trace_array *tr = inode->i_private;
7007 int cpu = tracing_get_cpu(inode);
7008 char buf[64];
7009 int r = 0;
7010 ssize_t ret;
7011
7012 mutex_lock(&trace_types_lock);
7013
7014 if (cpu == RING_BUFFER_ALL_CPUS) {
7015 int cpu, buf_size_same;
7016 unsigned long size;
7017
7018 size = 0;
7019 buf_size_same = 1;
7020 /* check if all cpu sizes are same */
7021 for_each_tracing_cpu(cpu) {
7022 /* fill in the size from first enabled cpu */
7023 if (size == 0)
7024 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7025 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7026 buf_size_same = 0;
7027 break;
7028 }
7029 }
7030
7031 if (buf_size_same) {
7032 if (!tr->ring_buffer_expanded)
7033 r = sprintf(buf, "%lu (expanded: %lu)\n",
7034 size >> 10,
7035 trace_buf_size >> 10);
7036 else
7037 r = sprintf(buf, "%lu\n", size >> 10);
7038 } else
7039 r = sprintf(buf, "X\n");
7040 } else
7041 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7042
7043 mutex_unlock(&trace_types_lock);
7044
7045 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7046 return ret;
7047 }
7048
7049 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7050 tracing_entries_write(struct file *filp, const char __user *ubuf,
7051 size_t cnt, loff_t *ppos)
7052 {
7053 struct inode *inode = file_inode(filp);
7054 struct trace_array *tr = inode->i_private;
7055 unsigned long val;
7056 int ret;
7057
7058 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7059 if (ret)
7060 return ret;
7061
7062 /* must have at least 1 entry */
7063 if (!val)
7064 return -EINVAL;
7065
7066 /* value is in KB */
7067 val <<= 10;
7068 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7069 if (ret < 0)
7070 return ret;
7071
7072 *ppos += cnt;
7073
7074 return cnt;
7075 }
7076
7077 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7078 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7079 size_t cnt, loff_t *ppos)
7080 {
7081 struct trace_array *tr = filp->private_data;
7082 char buf[64];
7083 int r, cpu;
7084 unsigned long size = 0, expanded_size = 0;
7085
7086 mutex_lock(&trace_types_lock);
7087 for_each_tracing_cpu(cpu) {
7088 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7089 if (!tr->ring_buffer_expanded)
7090 expanded_size += trace_buf_size >> 10;
7091 }
7092 if (tr->ring_buffer_expanded)
7093 r = sprintf(buf, "%lu\n", size);
7094 else
7095 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7096 mutex_unlock(&trace_types_lock);
7097
7098 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7099 }
7100
7101 #define LAST_BOOT_HEADER ((void *)1)
7102
l_next(struct seq_file * m,void * v,loff_t * pos)7103 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7104 {
7105 struct trace_array *tr = m->private;
7106 struct trace_scratch *tscratch = tr->scratch;
7107 unsigned int index = *pos;
7108
7109 (*pos)++;
7110
7111 if (*pos == 1)
7112 return LAST_BOOT_HEADER;
7113
7114 /* Only show offsets of the last boot data */
7115 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7116 return NULL;
7117
7118 /* *pos 0 is for the header, 1 is for the first module */
7119 index--;
7120
7121 if (index >= tscratch->nr_entries)
7122 return NULL;
7123
7124 return &tscratch->entries[index];
7125 }
7126
l_start(struct seq_file * m,loff_t * pos)7127 static void *l_start(struct seq_file *m, loff_t *pos)
7128 {
7129 mutex_lock(&scratch_mutex);
7130
7131 return l_next(m, NULL, pos);
7132 }
7133
l_stop(struct seq_file * m,void * p)7134 static void l_stop(struct seq_file *m, void *p)
7135 {
7136 mutex_unlock(&scratch_mutex);
7137 }
7138
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7139 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7140 {
7141 struct trace_scratch *tscratch = tr->scratch;
7142
7143 /*
7144 * Do not leak KASLR address. This only shows the KASLR address of
7145 * the last boot. When the ring buffer is started, the LAST_BOOT
7146 * flag gets cleared, and this should only report "current".
7147 * Otherwise it shows the KASLR address from the previous boot which
7148 * should not be the same as the current boot.
7149 */
7150 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7151 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7152 else
7153 seq_puts(m, "# Current\n");
7154 }
7155
l_show(struct seq_file * m,void * v)7156 static int l_show(struct seq_file *m, void *v)
7157 {
7158 struct trace_array *tr = m->private;
7159 struct trace_mod_entry *entry = v;
7160
7161 if (v == LAST_BOOT_HEADER) {
7162 show_last_boot_header(m, tr);
7163 return 0;
7164 }
7165
7166 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7167 return 0;
7168 }
7169
7170 static const struct seq_operations last_boot_seq_ops = {
7171 .start = l_start,
7172 .next = l_next,
7173 .stop = l_stop,
7174 .show = l_show,
7175 };
7176
tracing_last_boot_open(struct inode * inode,struct file * file)7177 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7178 {
7179 struct trace_array *tr = inode->i_private;
7180 struct seq_file *m;
7181 int ret;
7182
7183 ret = tracing_check_open_get_tr(tr);
7184 if (ret)
7185 return ret;
7186
7187 ret = seq_open(file, &last_boot_seq_ops);
7188 if (ret) {
7189 trace_array_put(tr);
7190 return ret;
7191 }
7192
7193 m = file->private_data;
7194 m->private = tr;
7195
7196 return 0;
7197 }
7198
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7199 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7200 {
7201 struct trace_array *tr = inode->i_private;
7202 int cpu = tracing_get_cpu(inode);
7203 int ret;
7204
7205 ret = tracing_check_open_get_tr(tr);
7206 if (ret)
7207 return ret;
7208
7209 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7210 if (ret < 0)
7211 __trace_array_put(tr);
7212 return ret;
7213 }
7214
7215 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7216 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7217 size_t cnt, loff_t *ppos)
7218 {
7219 /*
7220 * There is no need to read what the user has written, this function
7221 * is just to make sure that there is no error when "echo" is used
7222 */
7223
7224 *ppos += cnt;
7225
7226 return cnt;
7227 }
7228
7229 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7230 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7231 {
7232 struct trace_array *tr = inode->i_private;
7233
7234 /* disable tracing ? */
7235 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
7236 tracer_tracing_off(tr);
7237 /* resize the ring buffer to 0 */
7238 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7239
7240 trace_array_put(tr);
7241
7242 return 0;
7243 }
7244
7245 #define TRACE_MARKER_MAX_SIZE 4096
7246
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)7247 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
7248 size_t cnt, unsigned long ip)
7249 {
7250 struct ring_buffer_event *event;
7251 enum event_trigger_type tt = ETT_NONE;
7252 struct trace_buffer *buffer;
7253 struct print_entry *entry;
7254 int meta_size;
7255 ssize_t written;
7256 size_t size;
7257
7258 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7259 again:
7260 size = cnt + meta_size;
7261
7262 buffer = tr->array_buffer.buffer;
7263 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7264 tracing_gen_ctx());
7265 if (unlikely(!event)) {
7266 /*
7267 * If the size was greater than what was allowed, then
7268 * make it smaller and try again.
7269 */
7270 if (size > ring_buffer_max_event_size(buffer)) {
7271 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7272 /* The above should only happen once */
7273 if (WARN_ON_ONCE(cnt + meta_size == size))
7274 return -EBADF;
7275 goto again;
7276 }
7277
7278 /* Ring buffer disabled, return as if not open for write */
7279 return -EBADF;
7280 }
7281
7282 entry = ring_buffer_event_data(event);
7283 entry->ip = ip;
7284 memcpy(&entry->buf, buf, cnt);
7285 written = cnt;
7286
7287 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7288 /* do not add \n before testing triggers, but add \0 */
7289 entry->buf[cnt] = '\0';
7290 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7291 }
7292
7293 if (entry->buf[cnt - 1] != '\n') {
7294 entry->buf[cnt] = '\n';
7295 entry->buf[cnt + 1] = '\0';
7296 } else
7297 entry->buf[cnt] = '\0';
7298
7299 if (static_branch_unlikely(&trace_marker_exports_enabled))
7300 ftrace_exports(event, TRACE_EXPORT_MARKER);
7301 __buffer_unlock_commit(buffer, event);
7302
7303 if (tt)
7304 event_triggers_post_call(tr->trace_marker_file, tt);
7305
7306 return written;
7307 }
7308
7309 struct trace_user_buf {
7310 char *buf;
7311 };
7312
7313 static DEFINE_MUTEX(trace_user_buffer_mutex);
7314 static struct trace_user_buf_info *trace_user_buffer;
7315
7316 /**
7317 * trace_user_fault_destroy - free up allocated memory of a trace user buffer
7318 * @tinfo: The descriptor to free up
7319 *
7320 * Frees any data allocated in the trace info dsecriptor.
7321 */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)7322 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
7323 {
7324 char *buf;
7325 int cpu;
7326
7327 if (!tinfo || !tinfo->tbuf)
7328 return;
7329
7330 for_each_possible_cpu(cpu) {
7331 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7332 kfree(buf);
7333 }
7334 free_percpu(tinfo->tbuf);
7335 }
7336
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)7337 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
7338 {
7339 char *buf;
7340 int cpu;
7341
7342 lockdep_assert_held(&trace_user_buffer_mutex);
7343
7344 tinfo->tbuf = alloc_percpu(struct trace_user_buf);
7345 if (!tinfo->tbuf)
7346 return -ENOMEM;
7347
7348 tinfo->ref = 1;
7349 tinfo->size = size;
7350
7351 /* Clear each buffer in case of error */
7352 for_each_possible_cpu(cpu) {
7353 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
7354 }
7355
7356 for_each_possible_cpu(cpu) {
7357 buf = kmalloc_node(size, GFP_KERNEL,
7358 cpu_to_node(cpu));
7359 if (!buf)
7360 return -ENOMEM;
7361 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
7362 }
7363
7364 return 0;
7365 }
7366
7367 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)7368 static void user_buffer_free(struct trace_user_buf_info **tinfo)
7369 {
7370 lockdep_assert_held(&trace_user_buffer_mutex);
7371
7372 trace_user_fault_destroy(*tinfo);
7373 kfree(*tinfo);
7374 *tinfo = NULL;
7375 }
7376
7377 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)7378 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
7379 {
7380 bool alloc = false;
7381 int ret;
7382
7383 lockdep_assert_held(&trace_user_buffer_mutex);
7384
7385 if (!*tinfo) {
7386 alloc = true;
7387 *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL);
7388 if (!*tinfo)
7389 return -ENOMEM;
7390 }
7391
7392 ret = user_fault_buffer_enable(*tinfo, size);
7393 if (ret < 0 && alloc)
7394 user_buffer_free(tinfo);
7395
7396 return ret;
7397 }
7398
7399 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)7400 static void user_buffer_put(struct trace_user_buf_info **tinfo)
7401 {
7402 guard(mutex)(&trace_user_buffer_mutex);
7403
7404 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
7405 return;
7406
7407 if (--(*tinfo)->ref)
7408 return;
7409
7410 user_buffer_free(tinfo);
7411 }
7412
7413 /**
7414 * trace_user_fault_init - Allocated or reference a per CPU buffer
7415 * @tinfo: A pointer to the trace buffer descriptor
7416 * @size: The size to allocate each per CPU buffer
7417 *
7418 * Create a per CPU buffer that can be used to copy from user space
7419 * in a task context. When calling trace_user_fault_read(), preemption
7420 * must be disabled, and it will enable preemption and copy user
7421 * space data to the buffer. If any schedule switches occur, it will
7422 * retry until it succeeds without a schedule switch knowing the buffer
7423 * is still valid.
7424 *
7425 * Returns 0 on success, negative on failure.
7426 */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)7427 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
7428 {
7429 int ret;
7430
7431 if (!tinfo)
7432 return -EINVAL;
7433
7434 guard(mutex)(&trace_user_buffer_mutex);
7435
7436 ret = user_buffer_init(&tinfo, size);
7437 if (ret < 0)
7438 trace_user_fault_destroy(tinfo);
7439
7440 return ret;
7441 }
7442
7443 /**
7444 * trace_user_fault_get - up the ref count for the user buffer
7445 * @tinfo: A pointer to a pointer to the trace buffer descriptor
7446 *
7447 * Ups the ref count of the trace buffer.
7448 *
7449 * Returns the new ref count.
7450 */
trace_user_fault_get(struct trace_user_buf_info * tinfo)7451 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
7452 {
7453 if (!tinfo)
7454 return -1;
7455
7456 guard(mutex)(&trace_user_buffer_mutex);
7457
7458 tinfo->ref++;
7459 return tinfo->ref;
7460 }
7461
7462 /**
7463 * trace_user_fault_put - dereference a per cpu trace buffer
7464 * @tinfo: The @tinfo that was passed to trace_user_fault_get()
7465 *
7466 * Decrement the ref count of @tinfo.
7467 *
7468 * Returns the new refcount (negative on error).
7469 */
trace_user_fault_put(struct trace_user_buf_info * tinfo)7470 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
7471 {
7472 guard(mutex)(&trace_user_buffer_mutex);
7473
7474 if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
7475 return -1;
7476
7477 --tinfo->ref;
7478 return tinfo->ref;
7479 }
7480
7481 /**
7482 * trace_user_fault_read - Read user space into a per CPU buffer
7483 * @tinfo: The @tinfo allocated by trace_user_fault_get()
7484 * @ptr: The user space pointer to read
7485 * @size: The size of user space to read.
7486 * @copy_func: Optional function to use to copy from user space
7487 * @data: Data to pass to copy_func if it was supplied
7488 *
7489 * Preemption must be disabled when this is called, and must not
7490 * be enabled while using the returned buffer.
7491 * This does the copying from user space into a per CPU buffer.
7492 *
7493 * The @size must not be greater than the size passed in to
7494 * trace_user_fault_init().
7495 *
7496 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
7497 * otherwise it will call @copy_func. It will call @copy_func with:
7498 *
7499 * buffer: the per CPU buffer of the @tinfo.
7500 * ptr: The pointer @ptr to user space to read
7501 * size: The @size of the ptr to read
7502 * data: The @data parameter
7503 *
7504 * It is expected that @copy_func will return 0 on success and non zero
7505 * if there was a fault.
7506 *
7507 * Returns a pointer to the buffer with the content read from @ptr.
7508 * Preemption must remain disabled while the caller accesses the
7509 * buffer returned by this function.
7510 * Returns NULL if there was a fault, or the size passed in is
7511 * greater than the size passed to trace_user_fault_init().
7512 */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)7513 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
7514 const char __user *ptr, size_t size,
7515 trace_user_buf_copy copy_func, void *data)
7516 {
7517 int cpu = smp_processor_id();
7518 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
7519 unsigned int cnt;
7520 int trys = 0;
7521 int ret;
7522
7523 lockdep_assert_preemption_disabled();
7524
7525 /*
7526 * It's up to the caller to not try to copy more than it said
7527 * it would.
7528 */
7529 if (size > tinfo->size)
7530 return NULL;
7531
7532 /*
7533 * This acts similar to a seqcount. The per CPU context switches are
7534 * recorded, migration is disabled and preemption is enabled. The
7535 * read of the user space memory is copied into the per CPU buffer.
7536 * Preemption is disabled again, and if the per CPU context switches count
7537 * is still the same, it means the buffer has not been corrupted.
7538 * If the count is different, it is assumed the buffer is corrupted
7539 * and reading must be tried again.
7540 */
7541
7542 do {
7543 /*
7544 * If for some reason, copy_from_user() always causes a context
7545 * switch, this would then cause an infinite loop.
7546 * If this task is preempted by another user space task, it
7547 * will cause this task to try again. But just in case something
7548 * changes where the copying from user space causes another task
7549 * to run, prevent this from going into an infinite loop.
7550 * 100 tries should be plenty.
7551 */
7552 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
7553 return NULL;
7554
7555 /* Read the current CPU context switch counter */
7556 cnt = nr_context_switches_cpu(cpu);
7557
7558 /*
7559 * Preemption is going to be enabled, but this task must
7560 * remain on this CPU.
7561 */
7562 migrate_disable();
7563
7564 /*
7565 * Now preemption is being enabled and another task can come in
7566 * and use the same buffer and corrupt our data.
7567 */
7568 preempt_enable_notrace();
7569
7570 /* Make sure preemption is enabled here */
7571 lockdep_assert_preemption_enabled();
7572
7573 if (copy_func) {
7574 ret = copy_func(buffer, ptr, size, data);
7575 } else {
7576 ret = __copy_from_user(buffer, ptr, size);
7577 }
7578
7579 preempt_disable_notrace();
7580 migrate_enable();
7581
7582 /* if it faulted, no need to test if the buffer was corrupted */
7583 if (ret)
7584 return NULL;
7585
7586 /*
7587 * Preemption is disabled again, now check the per CPU context
7588 * switch counter. If it doesn't match, then another user space
7589 * process may have schedule in and corrupted our buffer. In that
7590 * case the copying must be retried.
7591 */
7592 } while (nr_context_switches_cpu(cpu) != cnt);
7593
7594 return buffer;
7595 }
7596
7597 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7598 tracing_mark_write(struct file *filp, const char __user *ubuf,
7599 size_t cnt, loff_t *fpos)
7600 {
7601 struct trace_array *tr = filp->private_data;
7602 ssize_t written = -ENODEV;
7603 unsigned long ip;
7604 char *buf;
7605
7606 if (tracing_disabled)
7607 return -EINVAL;
7608
7609 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7610 return -EINVAL;
7611
7612 if ((ssize_t)cnt < 0)
7613 return -EINVAL;
7614
7615 if (cnt > TRACE_MARKER_MAX_SIZE)
7616 cnt = TRACE_MARKER_MAX_SIZE;
7617
7618 /* Must have preemption disabled while having access to the buffer */
7619 guard(preempt_notrace)();
7620
7621 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7622 if (!buf)
7623 return -EFAULT;
7624
7625 /* The selftests expect this function to be the IP address */
7626 ip = _THIS_IP_;
7627
7628 /* The global trace_marker can go to multiple instances */
7629 if (tr == &global_trace) {
7630 guard(rcu)();
7631 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7632 written = write_marker_to_buffer(tr, buf, cnt, ip);
7633 if (written < 0)
7634 break;
7635 }
7636 } else {
7637 written = write_marker_to_buffer(tr, buf, cnt, ip);
7638 }
7639
7640 return written;
7641 }
7642
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)7643 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7644 const char *buf, size_t cnt)
7645 {
7646 struct ring_buffer_event *event;
7647 struct trace_buffer *buffer;
7648 struct raw_data_entry *entry;
7649 ssize_t written;
7650 size_t size;
7651
7652 /* cnt includes both the entry->id and the data behind it. */
7653 size = struct_offset(entry, id) + cnt;
7654
7655 buffer = tr->array_buffer.buffer;
7656
7657 if (size > ring_buffer_max_event_size(buffer))
7658 return -EINVAL;
7659
7660 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7661 tracing_gen_ctx());
7662 if (!event)
7663 /* Ring buffer disabled, return as if not open for write */
7664 return -EBADF;
7665
7666 entry = ring_buffer_event_data(event);
7667 unsafe_memcpy(&entry->id, buf, cnt,
7668 "id and content already reserved on ring buffer"
7669 "'buf' includes the 'id' and the data."
7670 "'entry' was allocated with cnt from 'id'.");
7671 written = cnt;
7672
7673 __buffer_unlock_commit(buffer, event);
7674
7675 return written;
7676 }
7677
7678 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7679 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7680 size_t cnt, loff_t *fpos)
7681 {
7682 struct trace_array *tr = filp->private_data;
7683 ssize_t written = -ENODEV;
7684 char *buf;
7685
7686 if (tracing_disabled)
7687 return -EINVAL;
7688
7689 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
7690 return -EINVAL;
7691
7692 /* The marker must at least have a tag id */
7693 if (cnt < sizeof(unsigned int))
7694 return -EINVAL;
7695
7696 /* raw write is all or nothing */
7697 if (cnt > TRACE_MARKER_MAX_SIZE)
7698 return -EINVAL;
7699
7700 /* Must have preemption disabled while having access to the buffer */
7701 guard(preempt_notrace)();
7702
7703 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
7704 if (!buf)
7705 return -EFAULT;
7706
7707 /* The global trace_marker_raw can go to multiple instances */
7708 if (tr == &global_trace) {
7709 guard(rcu)();
7710 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7711 written = write_raw_marker_to_buffer(tr, buf, cnt);
7712 if (written < 0)
7713 break;
7714 }
7715 } else {
7716 written = write_raw_marker_to_buffer(tr, buf, cnt);
7717 }
7718
7719 return written;
7720 }
7721
tracing_mark_open(struct inode * inode,struct file * filp)7722 static int tracing_mark_open(struct inode *inode, struct file *filp)
7723 {
7724 int ret;
7725
7726 scoped_guard(mutex, &trace_user_buffer_mutex) {
7727 if (!trace_user_buffer) {
7728 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
7729 if (ret < 0)
7730 return ret;
7731 } else {
7732 trace_user_buffer->ref++;
7733 }
7734 }
7735
7736 stream_open(inode, filp);
7737 ret = tracing_open_generic_tr(inode, filp);
7738 if (ret < 0)
7739 user_buffer_put(&trace_user_buffer);
7740 return ret;
7741 }
7742
tracing_mark_release(struct inode * inode,struct file * file)7743 static int tracing_mark_release(struct inode *inode, struct file *file)
7744 {
7745 user_buffer_put(&trace_user_buffer);
7746 return tracing_release_generic_tr(inode, file);
7747 }
7748
tracing_clock_show(struct seq_file * m,void * v)7749 static int tracing_clock_show(struct seq_file *m, void *v)
7750 {
7751 struct trace_array *tr = m->private;
7752 int i;
7753
7754 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7755 seq_printf(m,
7756 "%s%s%s%s", i ? " " : "",
7757 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7758 i == tr->clock_id ? "]" : "");
7759 seq_putc(m, '\n');
7760
7761 return 0;
7762 }
7763
tracing_set_clock(struct trace_array * tr,const char * clockstr)7764 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7765 {
7766 int i;
7767
7768 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7769 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7770 break;
7771 }
7772 if (i == ARRAY_SIZE(trace_clocks))
7773 return -EINVAL;
7774
7775 guard(mutex)(&trace_types_lock);
7776
7777 tr->clock_id = i;
7778
7779 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7780
7781 /*
7782 * New clock may not be consistent with the previous clock.
7783 * Reset the buffer so that it doesn't have incomparable timestamps.
7784 */
7785 tracing_reset_online_cpus(&tr->array_buffer);
7786
7787 #ifdef CONFIG_TRACER_MAX_TRACE
7788 if (tr->max_buffer.buffer)
7789 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7790 tracing_reset_online_cpus(&tr->max_buffer);
7791 #endif
7792
7793 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7794 struct trace_scratch *tscratch = tr->scratch;
7795
7796 tscratch->clock_id = i;
7797 }
7798
7799 return 0;
7800 }
7801
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7802 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7803 size_t cnt, loff_t *fpos)
7804 {
7805 struct seq_file *m = filp->private_data;
7806 struct trace_array *tr = m->private;
7807 char buf[64];
7808 const char *clockstr;
7809 int ret;
7810
7811 if (cnt >= sizeof(buf))
7812 return -EINVAL;
7813
7814 if (copy_from_user(buf, ubuf, cnt))
7815 return -EFAULT;
7816
7817 buf[cnt] = 0;
7818
7819 clockstr = strstrip(buf);
7820
7821 ret = tracing_set_clock(tr, clockstr);
7822 if (ret)
7823 return ret;
7824
7825 *fpos += cnt;
7826
7827 return cnt;
7828 }
7829
tracing_clock_open(struct inode * inode,struct file * file)7830 static int tracing_clock_open(struct inode *inode, struct file *file)
7831 {
7832 struct trace_array *tr = inode->i_private;
7833 int ret;
7834
7835 ret = tracing_check_open_get_tr(tr);
7836 if (ret)
7837 return ret;
7838
7839 ret = single_open(file, tracing_clock_show, inode->i_private);
7840 if (ret < 0)
7841 trace_array_put(tr);
7842
7843 return ret;
7844 }
7845
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7846 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7847 {
7848 struct trace_array *tr = m->private;
7849
7850 guard(mutex)(&trace_types_lock);
7851
7852 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7853 seq_puts(m, "delta [absolute]\n");
7854 else
7855 seq_puts(m, "[delta] absolute\n");
7856
7857 return 0;
7858 }
7859
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7860 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7861 {
7862 struct trace_array *tr = inode->i_private;
7863 int ret;
7864
7865 ret = tracing_check_open_get_tr(tr);
7866 if (ret)
7867 return ret;
7868
7869 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7870 if (ret < 0)
7871 trace_array_put(tr);
7872
7873 return ret;
7874 }
7875
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7876 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7877 {
7878 if (rbe == this_cpu_read(trace_buffered_event))
7879 return ring_buffer_time_stamp(buffer);
7880
7881 return ring_buffer_event_time_stamp(buffer, rbe);
7882 }
7883
7884 /*
7885 * Set or disable using the per CPU trace_buffer_event when possible.
7886 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7887 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7888 {
7889 guard(mutex)(&trace_types_lock);
7890
7891 if (set && tr->no_filter_buffering_ref++)
7892 return 0;
7893
7894 if (!set) {
7895 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7896 return -EINVAL;
7897
7898 --tr->no_filter_buffering_ref;
7899 }
7900
7901 return 0;
7902 }
7903
7904 struct ftrace_buffer_info {
7905 struct trace_iterator iter;
7906 void *spare;
7907 unsigned int spare_cpu;
7908 unsigned int spare_size;
7909 unsigned int read;
7910 };
7911
7912 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7913 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7914 {
7915 struct trace_array *tr = inode->i_private;
7916 struct trace_iterator *iter;
7917 struct seq_file *m;
7918 int ret;
7919
7920 ret = tracing_check_open_get_tr(tr);
7921 if (ret)
7922 return ret;
7923
7924 if (file->f_mode & FMODE_READ) {
7925 iter = __tracing_open(inode, file, true);
7926 if (IS_ERR(iter))
7927 ret = PTR_ERR(iter);
7928 } else {
7929 /* Writes still need the seq_file to hold the private data */
7930 ret = -ENOMEM;
7931 m = kzalloc(sizeof(*m), GFP_KERNEL);
7932 if (!m)
7933 goto out;
7934 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7935 if (!iter) {
7936 kfree(m);
7937 goto out;
7938 }
7939 ret = 0;
7940
7941 iter->tr = tr;
7942 iter->array_buffer = &tr->max_buffer;
7943 iter->cpu_file = tracing_get_cpu(inode);
7944 m->private = iter;
7945 file->private_data = m;
7946 }
7947 out:
7948 if (ret < 0)
7949 trace_array_put(tr);
7950
7951 return ret;
7952 }
7953
tracing_swap_cpu_buffer(void * tr)7954 static void tracing_swap_cpu_buffer(void *tr)
7955 {
7956 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7957 }
7958
7959 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7960 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7961 loff_t *ppos)
7962 {
7963 struct seq_file *m = filp->private_data;
7964 struct trace_iterator *iter = m->private;
7965 struct trace_array *tr = iter->tr;
7966 unsigned long val;
7967 int ret;
7968
7969 ret = tracing_update_buffers(tr);
7970 if (ret < 0)
7971 return ret;
7972
7973 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7974 if (ret)
7975 return ret;
7976
7977 guard(mutex)(&trace_types_lock);
7978
7979 if (tr->current_trace->use_max_tr)
7980 return -EBUSY;
7981
7982 local_irq_disable();
7983 arch_spin_lock(&tr->max_lock);
7984 if (tr->cond_snapshot)
7985 ret = -EBUSY;
7986 arch_spin_unlock(&tr->max_lock);
7987 local_irq_enable();
7988 if (ret)
7989 return ret;
7990
7991 switch (val) {
7992 case 0:
7993 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7994 return -EINVAL;
7995 if (tr->allocated_snapshot)
7996 free_snapshot(tr);
7997 break;
7998 case 1:
7999 /* Only allow per-cpu swap if the ring buffer supports it */
8000 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
8001 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
8002 return -EINVAL;
8003 #endif
8004 if (tr->allocated_snapshot)
8005 ret = resize_buffer_duplicate_size(&tr->max_buffer,
8006 &tr->array_buffer, iter->cpu_file);
8007
8008 ret = tracing_arm_snapshot_locked(tr);
8009 if (ret)
8010 return ret;
8011
8012 /* Now, we're going to swap */
8013 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
8014 local_irq_disable();
8015 update_max_tr(tr, current, smp_processor_id(), NULL);
8016 local_irq_enable();
8017 } else {
8018 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
8019 (void *)tr, 1);
8020 }
8021 tracing_disarm_snapshot(tr);
8022 break;
8023 default:
8024 if (tr->allocated_snapshot) {
8025 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
8026 tracing_reset_online_cpus(&tr->max_buffer);
8027 else
8028 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
8029 }
8030 break;
8031 }
8032
8033 if (ret >= 0) {
8034 *ppos += cnt;
8035 ret = cnt;
8036 }
8037
8038 return ret;
8039 }
8040
tracing_snapshot_release(struct inode * inode,struct file * file)8041 static int tracing_snapshot_release(struct inode *inode, struct file *file)
8042 {
8043 struct seq_file *m = file->private_data;
8044 int ret;
8045
8046 ret = tracing_release(inode, file);
8047
8048 if (file->f_mode & FMODE_READ)
8049 return ret;
8050
8051 /* If write only, the seq_file is just a stub */
8052 if (m)
8053 kfree(m->private);
8054 kfree(m);
8055
8056 return 0;
8057 }
8058
8059 static int tracing_buffers_open(struct inode *inode, struct file *filp);
8060 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
8061 size_t count, loff_t *ppos);
8062 static int tracing_buffers_release(struct inode *inode, struct file *file);
8063 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8064 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
8065
snapshot_raw_open(struct inode * inode,struct file * filp)8066 static int snapshot_raw_open(struct inode *inode, struct file *filp)
8067 {
8068 struct ftrace_buffer_info *info;
8069 int ret;
8070
8071 /* The following checks for tracefs lockdown */
8072 ret = tracing_buffers_open(inode, filp);
8073 if (ret < 0)
8074 return ret;
8075
8076 info = filp->private_data;
8077
8078 if (info->iter.trace->use_max_tr) {
8079 tracing_buffers_release(inode, filp);
8080 return -EBUSY;
8081 }
8082
8083 info->iter.snapshot = true;
8084 info->iter.array_buffer = &info->iter.tr->max_buffer;
8085
8086 return ret;
8087 }
8088
8089 #endif /* CONFIG_TRACER_SNAPSHOT */
8090
8091
8092 static const struct file_operations tracing_thresh_fops = {
8093 .open = tracing_open_generic,
8094 .read = tracing_thresh_read,
8095 .write = tracing_thresh_write,
8096 .llseek = generic_file_llseek,
8097 };
8098
8099 #ifdef CONFIG_TRACER_MAX_TRACE
8100 static const struct file_operations tracing_max_lat_fops = {
8101 .open = tracing_open_generic_tr,
8102 .read = tracing_max_lat_read,
8103 .write = tracing_max_lat_write,
8104 .llseek = generic_file_llseek,
8105 .release = tracing_release_generic_tr,
8106 };
8107 #endif
8108
8109 static const struct file_operations set_tracer_fops = {
8110 .open = tracing_open_generic_tr,
8111 .read = tracing_set_trace_read,
8112 .write = tracing_set_trace_write,
8113 .llseek = generic_file_llseek,
8114 .release = tracing_release_generic_tr,
8115 };
8116
8117 static const struct file_operations tracing_pipe_fops = {
8118 .open = tracing_open_pipe,
8119 .poll = tracing_poll_pipe,
8120 .read = tracing_read_pipe,
8121 .splice_read = tracing_splice_read_pipe,
8122 .release = tracing_release_pipe,
8123 };
8124
8125 static const struct file_operations tracing_entries_fops = {
8126 .open = tracing_open_generic_tr,
8127 .read = tracing_entries_read,
8128 .write = tracing_entries_write,
8129 .llseek = generic_file_llseek,
8130 .release = tracing_release_generic_tr,
8131 };
8132
8133 static const struct file_operations tracing_syscall_buf_fops = {
8134 .open = tracing_open_generic_tr,
8135 .read = tracing_syscall_buf_read,
8136 .write = tracing_syscall_buf_write,
8137 .llseek = generic_file_llseek,
8138 .release = tracing_release_generic_tr,
8139 };
8140
8141 static const struct file_operations tracing_buffer_meta_fops = {
8142 .open = tracing_buffer_meta_open,
8143 .read = seq_read,
8144 .llseek = seq_lseek,
8145 .release = tracing_seq_release,
8146 };
8147
8148 static const struct file_operations tracing_total_entries_fops = {
8149 .open = tracing_open_generic_tr,
8150 .read = tracing_total_entries_read,
8151 .llseek = generic_file_llseek,
8152 .release = tracing_release_generic_tr,
8153 };
8154
8155 static const struct file_operations tracing_free_buffer_fops = {
8156 .open = tracing_open_generic_tr,
8157 .write = tracing_free_buffer_write,
8158 .release = tracing_free_buffer_release,
8159 };
8160
8161 static const struct file_operations tracing_mark_fops = {
8162 .open = tracing_mark_open,
8163 .write = tracing_mark_write,
8164 .release = tracing_mark_release,
8165 };
8166
8167 static const struct file_operations tracing_mark_raw_fops = {
8168 .open = tracing_mark_open,
8169 .write = tracing_mark_raw_write,
8170 .release = tracing_mark_release,
8171 };
8172
8173 static const struct file_operations trace_clock_fops = {
8174 .open = tracing_clock_open,
8175 .read = seq_read,
8176 .llseek = seq_lseek,
8177 .release = tracing_single_release_tr,
8178 .write = tracing_clock_write,
8179 };
8180
8181 static const struct file_operations trace_time_stamp_mode_fops = {
8182 .open = tracing_time_stamp_mode_open,
8183 .read = seq_read,
8184 .llseek = seq_lseek,
8185 .release = tracing_single_release_tr,
8186 };
8187
8188 static const struct file_operations last_boot_fops = {
8189 .open = tracing_last_boot_open,
8190 .read = seq_read,
8191 .llseek = seq_lseek,
8192 .release = tracing_seq_release,
8193 };
8194
8195 #ifdef CONFIG_TRACER_SNAPSHOT
8196 static const struct file_operations snapshot_fops = {
8197 .open = tracing_snapshot_open,
8198 .read = seq_read,
8199 .write = tracing_snapshot_write,
8200 .llseek = tracing_lseek,
8201 .release = tracing_snapshot_release,
8202 };
8203
8204 static const struct file_operations snapshot_raw_fops = {
8205 .open = snapshot_raw_open,
8206 .read = tracing_buffers_read,
8207 .release = tracing_buffers_release,
8208 .splice_read = tracing_buffers_splice_read,
8209 };
8210
8211 #endif /* CONFIG_TRACER_SNAPSHOT */
8212
8213 /*
8214 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
8215 * @filp: The active open file structure
8216 * @ubuf: The userspace provided buffer to read value into
8217 * @cnt: The maximum number of bytes to read
8218 * @ppos: The current "file" position
8219 *
8220 * This function implements the write interface for a struct trace_min_max_param.
8221 * The filp->private_data must point to a trace_min_max_param structure that
8222 * defines where to write the value, the min and the max acceptable values,
8223 * and a lock to protect the write.
8224 */
8225 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8226 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
8227 {
8228 struct trace_min_max_param *param = filp->private_data;
8229 u64 val;
8230 int err;
8231
8232 if (!param)
8233 return -EFAULT;
8234
8235 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
8236 if (err)
8237 return err;
8238
8239 if (param->lock)
8240 mutex_lock(param->lock);
8241
8242 if (param->min && val < *param->min)
8243 err = -EINVAL;
8244
8245 if (param->max && val > *param->max)
8246 err = -EINVAL;
8247
8248 if (!err)
8249 *param->val = val;
8250
8251 if (param->lock)
8252 mutex_unlock(param->lock);
8253
8254 if (err)
8255 return err;
8256
8257 return cnt;
8258 }
8259
8260 /*
8261 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
8262 * @filp: The active open file structure
8263 * @ubuf: The userspace provided buffer to read value into
8264 * @cnt: The maximum number of bytes to read
8265 * @ppos: The current "file" position
8266 *
8267 * This function implements the read interface for a struct trace_min_max_param.
8268 * The filp->private_data must point to a trace_min_max_param struct with valid
8269 * data.
8270 */
8271 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8272 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8273 {
8274 struct trace_min_max_param *param = filp->private_data;
8275 char buf[U64_STR_SIZE];
8276 int len;
8277 u64 val;
8278
8279 if (!param)
8280 return -EFAULT;
8281
8282 val = *param->val;
8283
8284 if (cnt > sizeof(buf))
8285 cnt = sizeof(buf);
8286
8287 len = snprintf(buf, sizeof(buf), "%llu\n", val);
8288
8289 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
8290 }
8291
8292 const struct file_operations trace_min_max_fops = {
8293 .open = tracing_open_generic,
8294 .read = trace_min_max_read,
8295 .write = trace_min_max_write,
8296 };
8297
8298 #define TRACING_LOG_ERRS_MAX 8
8299 #define TRACING_LOG_LOC_MAX 128
8300
8301 #define CMD_PREFIX " Command: "
8302
8303 struct err_info {
8304 const char **errs; /* ptr to loc-specific array of err strings */
8305 u8 type; /* index into errs -> specific err string */
8306 u16 pos; /* caret position */
8307 u64 ts;
8308 };
8309
8310 struct tracing_log_err {
8311 struct list_head list;
8312 struct err_info info;
8313 char loc[TRACING_LOG_LOC_MAX]; /* err location */
8314 char *cmd; /* what caused err */
8315 };
8316
8317 static DEFINE_MUTEX(tracing_err_log_lock);
8318
alloc_tracing_log_err(int len)8319 static struct tracing_log_err *alloc_tracing_log_err(int len)
8320 {
8321 struct tracing_log_err *err;
8322
8323 err = kzalloc(sizeof(*err), GFP_KERNEL);
8324 if (!err)
8325 return ERR_PTR(-ENOMEM);
8326
8327 err->cmd = kzalloc(len, GFP_KERNEL);
8328 if (!err->cmd) {
8329 kfree(err);
8330 return ERR_PTR(-ENOMEM);
8331 }
8332
8333 return err;
8334 }
8335
free_tracing_log_err(struct tracing_log_err * err)8336 static void free_tracing_log_err(struct tracing_log_err *err)
8337 {
8338 kfree(err->cmd);
8339 kfree(err);
8340 }
8341
get_tracing_log_err(struct trace_array * tr,int len)8342 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8343 int len)
8344 {
8345 struct tracing_log_err *err;
8346 char *cmd;
8347
8348 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8349 err = alloc_tracing_log_err(len);
8350 if (PTR_ERR(err) != -ENOMEM)
8351 tr->n_err_log_entries++;
8352
8353 return err;
8354 }
8355 cmd = kzalloc(len, GFP_KERNEL);
8356 if (!cmd)
8357 return ERR_PTR(-ENOMEM);
8358 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8359 kfree(err->cmd);
8360 err->cmd = cmd;
8361 list_del(&err->list);
8362
8363 return err;
8364 }
8365
8366 /**
8367 * err_pos - find the position of a string within a command for error careting
8368 * @cmd: The tracing command that caused the error
8369 * @str: The string to position the caret at within @cmd
8370 *
8371 * Finds the position of the first occurrence of @str within @cmd. The
8372 * return value can be passed to tracing_log_err() for caret placement
8373 * within @cmd.
8374 *
8375 * Returns the index within @cmd of the first occurrence of @str or 0
8376 * if @str was not found.
8377 */
err_pos(char * cmd,const char * str)8378 unsigned int err_pos(char *cmd, const char *str)
8379 {
8380 char *found;
8381
8382 if (WARN_ON(!strlen(cmd)))
8383 return 0;
8384
8385 found = strstr(cmd, str);
8386 if (found)
8387 return found - cmd;
8388
8389 return 0;
8390 }
8391
8392 /**
8393 * tracing_log_err - write an error to the tracing error log
8394 * @tr: The associated trace array for the error (NULL for top level array)
8395 * @loc: A string describing where the error occurred
8396 * @cmd: The tracing command that caused the error
8397 * @errs: The array of loc-specific static error strings
8398 * @type: The index into errs[], which produces the specific static err string
8399 * @pos: The position the caret should be placed in the cmd
8400 *
8401 * Writes an error into tracing/error_log of the form:
8402 *
8403 * <loc>: error: <text>
8404 * Command: <cmd>
8405 * ^
8406 *
8407 * tracing/error_log is a small log file containing the last
8408 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8409 * unless there has been a tracing error, and the error log can be
8410 * cleared and have its memory freed by writing the empty string in
8411 * truncation mode to it i.e. echo > tracing/error_log.
8412 *
8413 * NOTE: the @errs array along with the @type param are used to
8414 * produce a static error string - this string is not copied and saved
8415 * when the error is logged - only a pointer to it is saved. See
8416 * existing callers for examples of how static strings are typically
8417 * defined for use with tracing_log_err().
8418 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8419 void tracing_log_err(struct trace_array *tr,
8420 const char *loc, const char *cmd,
8421 const char **errs, u8 type, u16 pos)
8422 {
8423 struct tracing_log_err *err;
8424 int len = 0;
8425
8426 if (!tr)
8427 tr = &global_trace;
8428
8429 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8430
8431 guard(mutex)(&tracing_err_log_lock);
8432
8433 err = get_tracing_log_err(tr, len);
8434 if (PTR_ERR(err) == -ENOMEM)
8435 return;
8436
8437 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8438 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8439
8440 err->info.errs = errs;
8441 err->info.type = type;
8442 err->info.pos = pos;
8443 err->info.ts = local_clock();
8444
8445 list_add_tail(&err->list, &tr->err_log);
8446 }
8447
clear_tracing_err_log(struct trace_array * tr)8448 static void clear_tracing_err_log(struct trace_array *tr)
8449 {
8450 struct tracing_log_err *err, *next;
8451
8452 guard(mutex)(&tracing_err_log_lock);
8453
8454 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8455 list_del(&err->list);
8456 free_tracing_log_err(err);
8457 }
8458
8459 tr->n_err_log_entries = 0;
8460 }
8461
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8462 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8463 {
8464 struct trace_array *tr = m->private;
8465
8466 mutex_lock(&tracing_err_log_lock);
8467
8468 return seq_list_start(&tr->err_log, *pos);
8469 }
8470
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8471 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8472 {
8473 struct trace_array *tr = m->private;
8474
8475 return seq_list_next(v, &tr->err_log, pos);
8476 }
8477
tracing_err_log_seq_stop(struct seq_file * m,void * v)8478 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8479 {
8480 mutex_unlock(&tracing_err_log_lock);
8481 }
8482
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8483 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8484 {
8485 u16 i;
8486
8487 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8488 seq_putc(m, ' ');
8489 for (i = 0; i < pos; i++)
8490 seq_putc(m, ' ');
8491 seq_puts(m, "^\n");
8492 }
8493
tracing_err_log_seq_show(struct seq_file * m,void * v)8494 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8495 {
8496 struct tracing_log_err *err = v;
8497
8498 if (err) {
8499 const char *err_text = err->info.errs[err->info.type];
8500 u64 sec = err->info.ts;
8501 u32 nsec;
8502
8503 nsec = do_div(sec, NSEC_PER_SEC);
8504 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8505 err->loc, err_text);
8506 seq_printf(m, "%s", err->cmd);
8507 tracing_err_log_show_pos(m, err->info.pos);
8508 }
8509
8510 return 0;
8511 }
8512
8513 static const struct seq_operations tracing_err_log_seq_ops = {
8514 .start = tracing_err_log_seq_start,
8515 .next = tracing_err_log_seq_next,
8516 .stop = tracing_err_log_seq_stop,
8517 .show = tracing_err_log_seq_show
8518 };
8519
tracing_err_log_open(struct inode * inode,struct file * file)8520 static int tracing_err_log_open(struct inode *inode, struct file *file)
8521 {
8522 struct trace_array *tr = inode->i_private;
8523 int ret = 0;
8524
8525 ret = tracing_check_open_get_tr(tr);
8526 if (ret)
8527 return ret;
8528
8529 /* If this file was opened for write, then erase contents */
8530 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8531 clear_tracing_err_log(tr);
8532
8533 if (file->f_mode & FMODE_READ) {
8534 ret = seq_open(file, &tracing_err_log_seq_ops);
8535 if (!ret) {
8536 struct seq_file *m = file->private_data;
8537 m->private = tr;
8538 } else {
8539 trace_array_put(tr);
8540 }
8541 }
8542 return ret;
8543 }
8544
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8545 static ssize_t tracing_err_log_write(struct file *file,
8546 const char __user *buffer,
8547 size_t count, loff_t *ppos)
8548 {
8549 return count;
8550 }
8551
tracing_err_log_release(struct inode * inode,struct file * file)8552 static int tracing_err_log_release(struct inode *inode, struct file *file)
8553 {
8554 struct trace_array *tr = inode->i_private;
8555
8556 trace_array_put(tr);
8557
8558 if (file->f_mode & FMODE_READ)
8559 seq_release(inode, file);
8560
8561 return 0;
8562 }
8563
8564 static const struct file_operations tracing_err_log_fops = {
8565 .open = tracing_err_log_open,
8566 .write = tracing_err_log_write,
8567 .read = seq_read,
8568 .llseek = tracing_lseek,
8569 .release = tracing_err_log_release,
8570 };
8571
tracing_buffers_open(struct inode * inode,struct file * filp)8572 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8573 {
8574 struct trace_array *tr = inode->i_private;
8575 struct ftrace_buffer_info *info;
8576 int ret;
8577
8578 ret = tracing_check_open_get_tr(tr);
8579 if (ret)
8580 return ret;
8581
8582 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8583 if (!info) {
8584 trace_array_put(tr);
8585 return -ENOMEM;
8586 }
8587
8588 mutex_lock(&trace_types_lock);
8589
8590 info->iter.tr = tr;
8591 info->iter.cpu_file = tracing_get_cpu(inode);
8592 info->iter.trace = tr->current_trace;
8593 info->iter.array_buffer = &tr->array_buffer;
8594 info->spare = NULL;
8595 /* Force reading ring buffer for first read */
8596 info->read = (unsigned int)-1;
8597
8598 filp->private_data = info;
8599
8600 tr->trace_ref++;
8601
8602 mutex_unlock(&trace_types_lock);
8603
8604 ret = nonseekable_open(inode, filp);
8605 if (ret < 0)
8606 trace_array_put(tr);
8607
8608 return ret;
8609 }
8610
8611 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8612 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8613 {
8614 struct ftrace_buffer_info *info = filp->private_data;
8615 struct trace_iterator *iter = &info->iter;
8616
8617 return trace_poll(iter, filp, poll_table);
8618 }
8619
8620 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8621 tracing_buffers_read(struct file *filp, char __user *ubuf,
8622 size_t count, loff_t *ppos)
8623 {
8624 struct ftrace_buffer_info *info = filp->private_data;
8625 struct trace_iterator *iter = &info->iter;
8626 void *trace_data;
8627 int page_size;
8628 ssize_t ret = 0;
8629 ssize_t size;
8630
8631 if (!count)
8632 return 0;
8633
8634 #ifdef CONFIG_TRACER_MAX_TRACE
8635 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8636 return -EBUSY;
8637 #endif
8638
8639 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8640
8641 /* Make sure the spare matches the current sub buffer size */
8642 if (info->spare) {
8643 if (page_size != info->spare_size) {
8644 ring_buffer_free_read_page(iter->array_buffer->buffer,
8645 info->spare_cpu, info->spare);
8646 info->spare = NULL;
8647 }
8648 }
8649
8650 if (!info->spare) {
8651 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8652 iter->cpu_file);
8653 if (IS_ERR(info->spare)) {
8654 ret = PTR_ERR(info->spare);
8655 info->spare = NULL;
8656 } else {
8657 info->spare_cpu = iter->cpu_file;
8658 info->spare_size = page_size;
8659 }
8660 }
8661 if (!info->spare)
8662 return ret;
8663
8664 /* Do we have previous read data to read? */
8665 if (info->read < page_size)
8666 goto read;
8667
8668 again:
8669 trace_access_lock(iter->cpu_file);
8670 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8671 info->spare,
8672 count,
8673 iter->cpu_file, 0);
8674 trace_access_unlock(iter->cpu_file);
8675
8676 if (ret < 0) {
8677 if (trace_empty(iter) && !iter->closed) {
8678 if (update_last_data_if_empty(iter->tr))
8679 return 0;
8680
8681 if ((filp->f_flags & O_NONBLOCK))
8682 return -EAGAIN;
8683
8684 ret = wait_on_pipe(iter, 0);
8685 if (ret)
8686 return ret;
8687
8688 goto again;
8689 }
8690 return 0;
8691 }
8692
8693 info->read = 0;
8694 read:
8695 size = page_size - info->read;
8696 if (size > count)
8697 size = count;
8698 trace_data = ring_buffer_read_page_data(info->spare);
8699 ret = copy_to_user(ubuf, trace_data + info->read, size);
8700 if (ret == size)
8701 return -EFAULT;
8702
8703 size -= ret;
8704
8705 *ppos += size;
8706 info->read += size;
8707
8708 return size;
8709 }
8710
tracing_buffers_flush(struct file * file,fl_owner_t id)8711 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8712 {
8713 struct ftrace_buffer_info *info = file->private_data;
8714 struct trace_iterator *iter = &info->iter;
8715
8716 iter->closed = true;
8717 /* Make sure the waiters see the new wait_index */
8718 (void)atomic_fetch_inc_release(&iter->wait_index);
8719
8720 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8721
8722 return 0;
8723 }
8724
tracing_buffers_release(struct inode * inode,struct file * file)8725 static int tracing_buffers_release(struct inode *inode, struct file *file)
8726 {
8727 struct ftrace_buffer_info *info = file->private_data;
8728 struct trace_iterator *iter = &info->iter;
8729
8730 guard(mutex)(&trace_types_lock);
8731
8732 iter->tr->trace_ref--;
8733
8734 __trace_array_put(iter->tr);
8735
8736 if (info->spare)
8737 ring_buffer_free_read_page(iter->array_buffer->buffer,
8738 info->spare_cpu, info->spare);
8739 kvfree(info);
8740
8741 return 0;
8742 }
8743
8744 struct buffer_ref {
8745 struct trace_buffer *buffer;
8746 void *page;
8747 int cpu;
8748 refcount_t refcount;
8749 };
8750
buffer_ref_release(struct buffer_ref * ref)8751 static void buffer_ref_release(struct buffer_ref *ref)
8752 {
8753 if (!refcount_dec_and_test(&ref->refcount))
8754 return;
8755 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8756 kfree(ref);
8757 }
8758
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8759 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8760 struct pipe_buffer *buf)
8761 {
8762 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8763
8764 buffer_ref_release(ref);
8765 buf->private = 0;
8766 }
8767
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8768 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8769 struct pipe_buffer *buf)
8770 {
8771 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8772
8773 if (refcount_read(&ref->refcount) > INT_MAX/2)
8774 return false;
8775
8776 refcount_inc(&ref->refcount);
8777 return true;
8778 }
8779
8780 /* Pipe buffer operations for a buffer. */
8781 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8782 .release = buffer_pipe_buf_release,
8783 .get = buffer_pipe_buf_get,
8784 };
8785
8786 /*
8787 * Callback from splice_to_pipe(), if we need to release some pages
8788 * at the end of the spd in case we error'ed out in filling the pipe.
8789 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8790 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8791 {
8792 struct buffer_ref *ref =
8793 (struct buffer_ref *)spd->partial[i].private;
8794
8795 buffer_ref_release(ref);
8796 spd->partial[i].private = 0;
8797 }
8798
8799 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8800 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8801 struct pipe_inode_info *pipe, size_t len,
8802 unsigned int flags)
8803 {
8804 struct ftrace_buffer_info *info = file->private_data;
8805 struct trace_iterator *iter = &info->iter;
8806 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8807 struct page *pages_def[PIPE_DEF_BUFFERS];
8808 struct splice_pipe_desc spd = {
8809 .pages = pages_def,
8810 .partial = partial_def,
8811 .nr_pages_max = PIPE_DEF_BUFFERS,
8812 .ops = &buffer_pipe_buf_ops,
8813 .spd_release = buffer_spd_release,
8814 };
8815 struct buffer_ref *ref;
8816 bool woken = false;
8817 int page_size;
8818 int entries, i;
8819 ssize_t ret = 0;
8820
8821 #ifdef CONFIG_TRACER_MAX_TRACE
8822 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8823 return -EBUSY;
8824 #endif
8825
8826 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8827 if (*ppos & (page_size - 1))
8828 return -EINVAL;
8829
8830 if (len & (page_size - 1)) {
8831 if (len < page_size)
8832 return -EINVAL;
8833 len &= (~(page_size - 1));
8834 }
8835
8836 if (splice_grow_spd(pipe, &spd))
8837 return -ENOMEM;
8838
8839 again:
8840 trace_access_lock(iter->cpu_file);
8841 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8842
8843 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8844 struct page *page;
8845 int r;
8846
8847 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8848 if (!ref) {
8849 ret = -ENOMEM;
8850 break;
8851 }
8852
8853 refcount_set(&ref->refcount, 1);
8854 ref->buffer = iter->array_buffer->buffer;
8855 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8856 if (IS_ERR(ref->page)) {
8857 ret = PTR_ERR(ref->page);
8858 ref->page = NULL;
8859 kfree(ref);
8860 break;
8861 }
8862 ref->cpu = iter->cpu_file;
8863
8864 r = ring_buffer_read_page(ref->buffer, ref->page,
8865 len, iter->cpu_file, 1);
8866 if (r < 0) {
8867 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8868 ref->page);
8869 kfree(ref);
8870 break;
8871 }
8872
8873 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8874
8875 spd.pages[i] = page;
8876 spd.partial[i].len = page_size;
8877 spd.partial[i].offset = 0;
8878 spd.partial[i].private = (unsigned long)ref;
8879 spd.nr_pages++;
8880 *ppos += page_size;
8881
8882 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8883 }
8884
8885 trace_access_unlock(iter->cpu_file);
8886 spd.nr_pages = i;
8887
8888 /* did we read anything? */
8889 if (!spd.nr_pages) {
8890
8891 if (ret)
8892 goto out;
8893
8894 if (woken)
8895 goto out;
8896
8897 ret = -EAGAIN;
8898 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8899 goto out;
8900
8901 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8902 if (ret)
8903 goto out;
8904
8905 /* No need to wait after waking up when tracing is off */
8906 if (!tracer_tracing_is_on(iter->tr))
8907 goto out;
8908
8909 /* Iterate one more time to collect any new data then exit */
8910 woken = true;
8911
8912 goto again;
8913 }
8914
8915 ret = splice_to_pipe(pipe, &spd);
8916 out:
8917 splice_shrink_spd(&spd);
8918
8919 return ret;
8920 }
8921
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8922 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8923 {
8924 struct ftrace_buffer_info *info = file->private_data;
8925 struct trace_iterator *iter = &info->iter;
8926 int err;
8927
8928 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8929 if (!(file->f_flags & O_NONBLOCK)) {
8930 err = ring_buffer_wait(iter->array_buffer->buffer,
8931 iter->cpu_file,
8932 iter->tr->buffer_percent,
8933 NULL, NULL);
8934 if (err)
8935 return err;
8936 }
8937
8938 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8939 iter->cpu_file);
8940 } else if (cmd) {
8941 return -ENOTTY;
8942 }
8943
8944 /*
8945 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8946 * waiters
8947 */
8948 guard(mutex)(&trace_types_lock);
8949
8950 /* Make sure the waiters see the new wait_index */
8951 (void)atomic_fetch_inc_release(&iter->wait_index);
8952
8953 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8954
8955 return 0;
8956 }
8957
8958 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8959 static int get_snapshot_map(struct trace_array *tr)
8960 {
8961 int err = 0;
8962
8963 /*
8964 * Called with mmap_lock held. lockdep would be unhappy if we would now
8965 * take trace_types_lock. Instead use the specific
8966 * snapshot_trigger_lock.
8967 */
8968 spin_lock(&tr->snapshot_trigger_lock);
8969
8970 if (tr->snapshot || tr->mapped == UINT_MAX)
8971 err = -EBUSY;
8972 else
8973 tr->mapped++;
8974
8975 spin_unlock(&tr->snapshot_trigger_lock);
8976
8977 /* Wait for update_max_tr() to observe iter->tr->mapped */
8978 if (tr->mapped == 1)
8979 synchronize_rcu();
8980
8981 return err;
8982
8983 }
put_snapshot_map(struct trace_array * tr)8984 static void put_snapshot_map(struct trace_array *tr)
8985 {
8986 spin_lock(&tr->snapshot_trigger_lock);
8987 if (!WARN_ON(!tr->mapped))
8988 tr->mapped--;
8989 spin_unlock(&tr->snapshot_trigger_lock);
8990 }
8991 #else
get_snapshot_map(struct trace_array * tr)8992 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8993 static inline void put_snapshot_map(struct trace_array *tr) { }
8994 #endif
8995
tracing_buffers_mmap_close(struct vm_area_struct * vma)8996 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8997 {
8998 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8999 struct trace_iterator *iter = &info->iter;
9000
9001 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
9002 put_snapshot_map(iter->tr);
9003 }
9004
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)9005 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
9006 {
9007 /*
9008 * Trace buffer mappings require the complete buffer including
9009 * the meta page. Partial mappings are not supported.
9010 */
9011 return -EINVAL;
9012 }
9013
9014 static const struct vm_operations_struct tracing_buffers_vmops = {
9015 .close = tracing_buffers_mmap_close,
9016 .may_split = tracing_buffers_may_split,
9017 };
9018
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)9019 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
9020 {
9021 struct ftrace_buffer_info *info = filp->private_data;
9022 struct trace_iterator *iter = &info->iter;
9023 int ret = 0;
9024
9025 /* A memmap'ed and backup buffers are not supported for user space mmap */
9026 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
9027 return -ENODEV;
9028
9029 ret = get_snapshot_map(iter->tr);
9030 if (ret)
9031 return ret;
9032
9033 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
9034 if (ret)
9035 put_snapshot_map(iter->tr);
9036
9037 vma->vm_ops = &tracing_buffers_vmops;
9038
9039 return ret;
9040 }
9041
9042 static const struct file_operations tracing_buffers_fops = {
9043 .open = tracing_buffers_open,
9044 .read = tracing_buffers_read,
9045 .poll = tracing_buffers_poll,
9046 .release = tracing_buffers_release,
9047 .flush = tracing_buffers_flush,
9048 .splice_read = tracing_buffers_splice_read,
9049 .unlocked_ioctl = tracing_buffers_ioctl,
9050 .mmap = tracing_buffers_mmap,
9051 };
9052
9053 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)9054 tracing_stats_read(struct file *filp, char __user *ubuf,
9055 size_t count, loff_t *ppos)
9056 {
9057 struct inode *inode = file_inode(filp);
9058 struct trace_array *tr = inode->i_private;
9059 struct array_buffer *trace_buf = &tr->array_buffer;
9060 int cpu = tracing_get_cpu(inode);
9061 struct trace_seq *s;
9062 unsigned long cnt;
9063 unsigned long long t;
9064 unsigned long usec_rem;
9065
9066 s = kmalloc(sizeof(*s), GFP_KERNEL);
9067 if (!s)
9068 return -ENOMEM;
9069
9070 trace_seq_init(s);
9071
9072 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
9073 trace_seq_printf(s, "entries: %ld\n", cnt);
9074
9075 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
9076 trace_seq_printf(s, "overrun: %ld\n", cnt);
9077
9078 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
9079 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
9080
9081 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
9082 trace_seq_printf(s, "bytes: %ld\n", cnt);
9083
9084 if (trace_clocks[tr->clock_id].in_ns) {
9085 /* local or global for trace_clock */
9086 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9087 usec_rem = do_div(t, USEC_PER_SEC);
9088 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
9089 t, usec_rem);
9090
9091 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
9092 usec_rem = do_div(t, USEC_PER_SEC);
9093 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
9094 } else {
9095 /* counter or tsc mode for trace_clock */
9096 trace_seq_printf(s, "oldest event ts: %llu\n",
9097 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
9098
9099 trace_seq_printf(s, "now ts: %llu\n",
9100 ring_buffer_time_stamp(trace_buf->buffer));
9101 }
9102
9103 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
9104 trace_seq_printf(s, "dropped events: %ld\n", cnt);
9105
9106 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
9107 trace_seq_printf(s, "read events: %ld\n", cnt);
9108
9109 count = simple_read_from_buffer(ubuf, count, ppos,
9110 s->buffer, trace_seq_used(s));
9111
9112 kfree(s);
9113
9114 return count;
9115 }
9116
9117 static const struct file_operations tracing_stats_fops = {
9118 .open = tracing_open_generic_tr,
9119 .read = tracing_stats_read,
9120 .llseek = generic_file_llseek,
9121 .release = tracing_release_generic_tr,
9122 };
9123
9124 #ifdef CONFIG_DYNAMIC_FTRACE
9125
9126 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9127 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
9128 size_t cnt, loff_t *ppos)
9129 {
9130 ssize_t ret;
9131 char *buf;
9132 int r;
9133
9134 /* 512 should be plenty to hold the amount needed */
9135 #define DYN_INFO_BUF_SIZE 512
9136
9137 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
9138 if (!buf)
9139 return -ENOMEM;
9140
9141 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
9142 "%ld pages:%ld groups: %ld\n"
9143 "ftrace boot update time = %llu (ns)\n"
9144 "ftrace module total update time = %llu (ns)\n",
9145 ftrace_update_tot_cnt,
9146 ftrace_number_of_pages,
9147 ftrace_number_of_groups,
9148 ftrace_update_time,
9149 ftrace_total_mod_time);
9150
9151 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9152 kfree(buf);
9153 return ret;
9154 }
9155
9156 static const struct file_operations tracing_dyn_info_fops = {
9157 .open = tracing_open_generic,
9158 .read = tracing_read_dyn_info,
9159 .llseek = generic_file_llseek,
9160 };
9161 #endif /* CONFIG_DYNAMIC_FTRACE */
9162
9163 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
9164 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9165 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
9166 struct trace_array *tr, struct ftrace_probe_ops *ops,
9167 void *data)
9168 {
9169 tracing_snapshot_instance(tr);
9170 }
9171
9172 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)9173 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
9174 struct trace_array *tr, struct ftrace_probe_ops *ops,
9175 void *data)
9176 {
9177 struct ftrace_func_mapper *mapper = data;
9178 long *count = NULL;
9179
9180 if (mapper)
9181 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9182
9183 if (count) {
9184
9185 if (*count <= 0)
9186 return;
9187
9188 (*count)--;
9189 }
9190
9191 tracing_snapshot_instance(tr);
9192 }
9193
9194 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)9195 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
9196 struct ftrace_probe_ops *ops, void *data)
9197 {
9198 struct ftrace_func_mapper *mapper = data;
9199 long *count = NULL;
9200
9201 seq_printf(m, "%ps:", (void *)ip);
9202
9203 seq_puts(m, "snapshot");
9204
9205 if (mapper)
9206 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
9207
9208 if (count)
9209 seq_printf(m, ":count=%ld\n", *count);
9210 else
9211 seq_puts(m, ":unlimited\n");
9212
9213 return 0;
9214 }
9215
9216 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)9217 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
9218 unsigned long ip, void *init_data, void **data)
9219 {
9220 struct ftrace_func_mapper *mapper = *data;
9221
9222 if (!mapper) {
9223 mapper = allocate_ftrace_func_mapper();
9224 if (!mapper)
9225 return -ENOMEM;
9226 *data = mapper;
9227 }
9228
9229 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
9230 }
9231
9232 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)9233 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
9234 unsigned long ip, void *data)
9235 {
9236 struct ftrace_func_mapper *mapper = data;
9237
9238 if (!ip) {
9239 if (!mapper)
9240 return;
9241 free_ftrace_func_mapper(mapper, NULL);
9242 return;
9243 }
9244
9245 ftrace_func_mapper_remove_ip(mapper, ip);
9246 }
9247
9248 static struct ftrace_probe_ops snapshot_probe_ops = {
9249 .func = ftrace_snapshot,
9250 .print = ftrace_snapshot_print,
9251 };
9252
9253 static struct ftrace_probe_ops snapshot_count_probe_ops = {
9254 .func = ftrace_count_snapshot,
9255 .print = ftrace_snapshot_print,
9256 .init = ftrace_snapshot_init,
9257 .free = ftrace_snapshot_free,
9258 };
9259
9260 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)9261 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
9262 char *glob, char *cmd, char *param, int enable)
9263 {
9264 struct ftrace_probe_ops *ops;
9265 void *count = (void *)-1;
9266 char *number;
9267 int ret;
9268
9269 if (!tr)
9270 return -ENODEV;
9271
9272 /* hash funcs only work with set_ftrace_filter */
9273 if (!enable)
9274 return -EINVAL;
9275
9276 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
9277
9278 if (glob[0] == '!') {
9279 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
9280 if (!ret)
9281 tracing_disarm_snapshot(tr);
9282
9283 return ret;
9284 }
9285
9286 if (!param)
9287 goto out_reg;
9288
9289 number = strsep(¶m, ":");
9290
9291 if (!strlen(number))
9292 goto out_reg;
9293
9294 /*
9295 * We use the callback data field (which is a pointer)
9296 * as our counter.
9297 */
9298 ret = kstrtoul(number, 0, (unsigned long *)&count);
9299 if (ret)
9300 return ret;
9301
9302 out_reg:
9303 ret = tracing_arm_snapshot(tr);
9304 if (ret < 0)
9305 return ret;
9306
9307 ret = register_ftrace_function_probe(glob, tr, ops, count);
9308 if (ret < 0)
9309 tracing_disarm_snapshot(tr);
9310
9311 return ret < 0 ? ret : 0;
9312 }
9313
9314 static struct ftrace_func_command ftrace_snapshot_cmd = {
9315 .name = "snapshot",
9316 .func = ftrace_trace_snapshot_callback,
9317 };
9318
register_snapshot_cmd(void)9319 static __init int register_snapshot_cmd(void)
9320 {
9321 return register_ftrace_command(&ftrace_snapshot_cmd);
9322 }
9323 #else
register_snapshot_cmd(void)9324 static inline __init int register_snapshot_cmd(void) { return 0; }
9325 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
9326
tracing_get_dentry(struct trace_array * tr)9327 static struct dentry *tracing_get_dentry(struct trace_array *tr)
9328 {
9329 /* Top directory uses NULL as the parent */
9330 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
9331 return NULL;
9332
9333 if (WARN_ON(!tr->dir))
9334 return ERR_PTR(-ENODEV);
9335
9336 /* All sub buffers have a descriptor */
9337 return tr->dir;
9338 }
9339
tracing_dentry_percpu(struct trace_array * tr,int cpu)9340 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
9341 {
9342 struct dentry *d_tracer;
9343
9344 if (tr->percpu_dir)
9345 return tr->percpu_dir;
9346
9347 d_tracer = tracing_get_dentry(tr);
9348 if (IS_ERR(d_tracer))
9349 return NULL;
9350
9351 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9352
9353 MEM_FAIL(!tr->percpu_dir,
9354 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9355
9356 return tr->percpu_dir;
9357 }
9358
9359 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9360 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9361 void *data, long cpu, const struct file_operations *fops)
9362 {
9363 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9364
9365 if (ret) /* See tracing_get_cpu() */
9366 d_inode(ret)->i_cdev = (void *)(cpu + 1);
9367 return ret;
9368 }
9369
9370 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9371 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9372 {
9373 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9374 struct dentry *d_cpu;
9375 char cpu_dir[30]; /* 30 characters should be more than enough */
9376
9377 if (!d_percpu)
9378 return;
9379
9380 snprintf(cpu_dir, 30, "cpu%ld", cpu);
9381 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9382 if (!d_cpu) {
9383 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9384 return;
9385 }
9386
9387 /* per cpu trace_pipe */
9388 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9389 tr, cpu, &tracing_pipe_fops);
9390
9391 /* per cpu trace */
9392 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9393 tr, cpu, &tracing_fops);
9394
9395 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9396 tr, cpu, &tracing_buffers_fops);
9397
9398 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9399 tr, cpu, &tracing_stats_fops);
9400
9401 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9402 tr, cpu, &tracing_entries_fops);
9403
9404 if (tr->range_addr_start)
9405 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9406 tr, cpu, &tracing_buffer_meta_fops);
9407 #ifdef CONFIG_TRACER_SNAPSHOT
9408 if (!tr->range_addr_start) {
9409 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9410 tr, cpu, &snapshot_fops);
9411
9412 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9413 tr, cpu, &snapshot_raw_fops);
9414 }
9415 #endif
9416 }
9417
9418 #ifdef CONFIG_FTRACE_SELFTEST
9419 /* Let selftest have access to static functions in this file */
9420 #include "trace_selftest.c"
9421 #endif
9422
9423 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9424 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9425 loff_t *ppos)
9426 {
9427 struct trace_option_dentry *topt = filp->private_data;
9428 char *buf;
9429
9430 if (topt->flags->val & topt->opt->bit)
9431 buf = "1\n";
9432 else
9433 buf = "0\n";
9434
9435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9436 }
9437
9438 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9439 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9440 loff_t *ppos)
9441 {
9442 struct trace_option_dentry *topt = filp->private_data;
9443 unsigned long val;
9444 int ret;
9445
9446 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9447 if (ret)
9448 return ret;
9449
9450 if (val != 0 && val != 1)
9451 return -EINVAL;
9452
9453 if (!!(topt->flags->val & topt->opt->bit) != val) {
9454 guard(mutex)(&trace_types_lock);
9455 ret = __set_tracer_option(topt->tr, topt->flags,
9456 topt->opt, !val);
9457 if (ret)
9458 return ret;
9459 }
9460
9461 *ppos += cnt;
9462
9463 return cnt;
9464 }
9465
tracing_open_options(struct inode * inode,struct file * filp)9466 static int tracing_open_options(struct inode *inode, struct file *filp)
9467 {
9468 struct trace_option_dentry *topt = inode->i_private;
9469 int ret;
9470
9471 ret = tracing_check_open_get_tr(topt->tr);
9472 if (ret)
9473 return ret;
9474
9475 filp->private_data = inode->i_private;
9476 return 0;
9477 }
9478
tracing_release_options(struct inode * inode,struct file * file)9479 static int tracing_release_options(struct inode *inode, struct file *file)
9480 {
9481 struct trace_option_dentry *topt = file->private_data;
9482
9483 trace_array_put(topt->tr);
9484 return 0;
9485 }
9486
9487 static const struct file_operations trace_options_fops = {
9488 .open = tracing_open_options,
9489 .read = trace_options_read,
9490 .write = trace_options_write,
9491 .llseek = generic_file_llseek,
9492 .release = tracing_release_options,
9493 };
9494
9495 /*
9496 * In order to pass in both the trace_array descriptor as well as the index
9497 * to the flag that the trace option file represents, the trace_array
9498 * has a character array of trace_flags_index[], which holds the index
9499 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9500 * The address of this character array is passed to the flag option file
9501 * read/write callbacks.
9502 *
9503 * In order to extract both the index and the trace_array descriptor,
9504 * get_tr_index() uses the following algorithm.
9505 *
9506 * idx = *ptr;
9507 *
9508 * As the pointer itself contains the address of the index (remember
9509 * index[1] == 1).
9510 *
9511 * Then to get the trace_array descriptor, by subtracting that index
9512 * from the ptr, we get to the start of the index itself.
9513 *
9514 * ptr - idx == &index[0]
9515 *
9516 * Then a simple container_of() from that pointer gets us to the
9517 * trace_array descriptor.
9518 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9519 static void get_tr_index(void *data, struct trace_array **ptr,
9520 unsigned int *pindex)
9521 {
9522 *pindex = *(unsigned char *)data;
9523
9524 *ptr = container_of(data - *pindex, struct trace_array,
9525 trace_flags_index);
9526 }
9527
9528 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9529 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9530 loff_t *ppos)
9531 {
9532 void *tr_index = filp->private_data;
9533 struct trace_array *tr;
9534 unsigned int index;
9535 char *buf;
9536
9537 get_tr_index(tr_index, &tr, &index);
9538
9539 if (tr->trace_flags & (1ULL << index))
9540 buf = "1\n";
9541 else
9542 buf = "0\n";
9543
9544 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9545 }
9546
9547 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9548 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9549 loff_t *ppos)
9550 {
9551 void *tr_index = filp->private_data;
9552 struct trace_array *tr;
9553 unsigned int index;
9554 unsigned long val;
9555 int ret;
9556
9557 get_tr_index(tr_index, &tr, &index);
9558
9559 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9560 if (ret)
9561 return ret;
9562
9563 if (val != 0 && val != 1)
9564 return -EINVAL;
9565
9566 mutex_lock(&event_mutex);
9567 mutex_lock(&trace_types_lock);
9568 ret = set_tracer_flag(tr, 1ULL << index, val);
9569 mutex_unlock(&trace_types_lock);
9570 mutex_unlock(&event_mutex);
9571
9572 if (ret < 0)
9573 return ret;
9574
9575 *ppos += cnt;
9576
9577 return cnt;
9578 }
9579
9580 static const struct file_operations trace_options_core_fops = {
9581 .open = tracing_open_generic,
9582 .read = trace_options_core_read,
9583 .write = trace_options_core_write,
9584 .llseek = generic_file_llseek,
9585 };
9586
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9587 struct dentry *trace_create_file(const char *name,
9588 umode_t mode,
9589 struct dentry *parent,
9590 void *data,
9591 const struct file_operations *fops)
9592 {
9593 struct dentry *ret;
9594
9595 ret = tracefs_create_file(name, mode, parent, data, fops);
9596 if (!ret)
9597 pr_warn("Could not create tracefs '%s' entry\n", name);
9598
9599 return ret;
9600 }
9601
9602
trace_options_init_dentry(struct trace_array * tr)9603 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9604 {
9605 struct dentry *d_tracer;
9606
9607 if (tr->options)
9608 return tr->options;
9609
9610 d_tracer = tracing_get_dentry(tr);
9611 if (IS_ERR(d_tracer))
9612 return NULL;
9613
9614 tr->options = tracefs_create_dir("options", d_tracer);
9615 if (!tr->options) {
9616 pr_warn("Could not create tracefs directory 'options'\n");
9617 return NULL;
9618 }
9619
9620 return tr->options;
9621 }
9622
9623 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9624 create_trace_option_file(struct trace_array *tr,
9625 struct trace_option_dentry *topt,
9626 struct tracer_flags *flags,
9627 struct tracer_opt *opt)
9628 {
9629 struct dentry *t_options;
9630
9631 t_options = trace_options_init_dentry(tr);
9632 if (!t_options)
9633 return;
9634
9635 topt->flags = flags;
9636 topt->opt = opt;
9637 topt->tr = tr;
9638
9639 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9640 t_options, topt, &trace_options_fops);
9641 }
9642
9643 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)9644 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
9645 struct tracer_flags *flags)
9646 {
9647 struct trace_option_dentry *topts;
9648 struct trace_options *tr_topts;
9649 struct tracer_opt *opts;
9650 int cnt;
9651
9652 if (!flags || !flags->opts)
9653 return 0;
9654
9655 opts = flags->opts;
9656
9657 for (cnt = 0; opts[cnt].name; cnt++)
9658 ;
9659
9660 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9661 if (!topts)
9662 return 0;
9663
9664 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9665 GFP_KERNEL);
9666 if (!tr_topts) {
9667 kfree(topts);
9668 return -ENOMEM;
9669 }
9670
9671 tr->topts = tr_topts;
9672 tr->topts[tr->nr_topts].tracer = tracer;
9673 tr->topts[tr->nr_topts].topts = topts;
9674 tr->nr_topts++;
9675
9676 for (cnt = 0; opts[cnt].name; cnt++) {
9677 create_trace_option_file(tr, &topts[cnt], flags,
9678 &opts[cnt]);
9679 MEM_FAIL(topts[cnt].entry == NULL,
9680 "Failed to create trace option: %s",
9681 opts[cnt].name);
9682 }
9683 return 0;
9684 }
9685
get_global_flags_val(struct tracer * tracer)9686 static int get_global_flags_val(struct tracer *tracer)
9687 {
9688 struct tracers *t;
9689
9690 list_for_each_entry(t, &global_trace.tracers, list) {
9691 if (t->tracer != tracer)
9692 continue;
9693 if (!t->flags)
9694 return -1;
9695 return t->flags->val;
9696 }
9697 return -1;
9698 }
9699
add_tracer_options(struct trace_array * tr,struct tracers * t)9700 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
9701 {
9702 struct tracer *tracer = t->tracer;
9703 struct tracer_flags *flags = t->flags ?: tracer->flags;
9704
9705 if (!flags)
9706 return 0;
9707
9708 /* Only add tracer options after update_tracer_options finish */
9709 if (!tracer_options_updated)
9710 return 0;
9711
9712 return create_trace_option_files(tr, tracer, flags);
9713 }
9714
add_tracer(struct trace_array * tr,struct tracer * tracer)9715 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
9716 {
9717 struct tracer_flags *flags;
9718 struct tracers *t;
9719 int ret;
9720
9721 /* Only enable if the directory has been created already. */
9722 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
9723 return 0;
9724
9725 /*
9726 * If this is an instance, only create flags for tracers
9727 * the instance may have.
9728 */
9729 if (!trace_ok_for_array(tracer, tr))
9730 return 0;
9731
9732 t = kmalloc(sizeof(*t), GFP_KERNEL);
9733 if (!t)
9734 return -ENOMEM;
9735
9736 t->tracer = tracer;
9737 t->flags = NULL;
9738 list_add(&t->list, &tr->tracers);
9739
9740 flags = tracer->flags;
9741 if (!flags) {
9742 if (!tracer->default_flags)
9743 return 0;
9744
9745 /*
9746 * If the tracer defines default flags, it means the flags are
9747 * per trace instance.
9748 */
9749 flags = kmalloc(sizeof(*flags), GFP_KERNEL);
9750 if (!flags)
9751 return -ENOMEM;
9752
9753 *flags = *tracer->default_flags;
9754 flags->trace = tracer;
9755
9756 t->flags = flags;
9757
9758 /* If this is an instance, inherit the global_trace flags */
9759 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
9760 int val = get_global_flags_val(tracer);
9761 if (!WARN_ON_ONCE(val < 0))
9762 flags->val = val;
9763 }
9764 }
9765
9766 ret = add_tracer_options(tr, t);
9767 if (ret < 0) {
9768 list_del(&t->list);
9769 kfree(t->flags);
9770 kfree(t);
9771 }
9772
9773 return ret;
9774 }
9775
9776 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9777 create_trace_option_core_file(struct trace_array *tr,
9778 const char *option, long index)
9779 {
9780 struct dentry *t_options;
9781
9782 t_options = trace_options_init_dentry(tr);
9783 if (!t_options)
9784 return NULL;
9785
9786 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9787 (void *)&tr->trace_flags_index[index],
9788 &trace_options_core_fops);
9789 }
9790
create_trace_options_dir(struct trace_array * tr)9791 static void create_trace_options_dir(struct trace_array *tr)
9792 {
9793 struct dentry *t_options;
9794 bool top_level = tr == &global_trace;
9795 int i;
9796
9797 t_options = trace_options_init_dentry(tr);
9798 if (!t_options)
9799 return;
9800
9801 for (i = 0; trace_options[i]; i++) {
9802 if (top_level ||
9803 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
9804 create_trace_option_core_file(tr, trace_options[i], i);
9805 }
9806 }
9807 }
9808
9809 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9810 rb_simple_read(struct file *filp, char __user *ubuf,
9811 size_t cnt, loff_t *ppos)
9812 {
9813 struct trace_array *tr = filp->private_data;
9814 char buf[64];
9815 int r;
9816
9817 r = tracer_tracing_is_on(tr);
9818 r = sprintf(buf, "%d\n", r);
9819
9820 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9821 }
9822
9823 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9824 rb_simple_write(struct file *filp, const char __user *ubuf,
9825 size_t cnt, loff_t *ppos)
9826 {
9827 struct trace_array *tr = filp->private_data;
9828 struct trace_buffer *buffer = tr->array_buffer.buffer;
9829 unsigned long val;
9830 int ret;
9831
9832 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9833 if (ret)
9834 return ret;
9835
9836 if (buffer) {
9837 guard(mutex)(&trace_types_lock);
9838 if (!!val == tracer_tracing_is_on(tr)) {
9839 val = 0; /* do nothing */
9840 } else if (val) {
9841 tracer_tracing_on(tr);
9842 if (tr->current_trace->start)
9843 tr->current_trace->start(tr);
9844 } else {
9845 tracer_tracing_off(tr);
9846 if (tr->current_trace->stop)
9847 tr->current_trace->stop(tr);
9848 /* Wake up any waiters */
9849 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9850 }
9851 }
9852
9853 (*ppos)++;
9854
9855 return cnt;
9856 }
9857
9858 static const struct file_operations rb_simple_fops = {
9859 .open = tracing_open_generic_tr,
9860 .read = rb_simple_read,
9861 .write = rb_simple_write,
9862 .release = tracing_release_generic_tr,
9863 .llseek = default_llseek,
9864 };
9865
9866 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9867 buffer_percent_read(struct file *filp, char __user *ubuf,
9868 size_t cnt, loff_t *ppos)
9869 {
9870 struct trace_array *tr = filp->private_data;
9871 char buf[64];
9872 int r;
9873
9874 r = tr->buffer_percent;
9875 r = sprintf(buf, "%d\n", r);
9876
9877 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9878 }
9879
9880 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9881 buffer_percent_write(struct file *filp, const char __user *ubuf,
9882 size_t cnt, loff_t *ppos)
9883 {
9884 struct trace_array *tr = filp->private_data;
9885 unsigned long val;
9886 int ret;
9887
9888 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9889 if (ret)
9890 return ret;
9891
9892 if (val > 100)
9893 return -EINVAL;
9894
9895 tr->buffer_percent = val;
9896
9897 (*ppos)++;
9898
9899 return cnt;
9900 }
9901
9902 static const struct file_operations buffer_percent_fops = {
9903 .open = tracing_open_generic_tr,
9904 .read = buffer_percent_read,
9905 .write = buffer_percent_write,
9906 .release = tracing_release_generic_tr,
9907 .llseek = default_llseek,
9908 };
9909
9910 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9911 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9912 {
9913 struct trace_array *tr = filp->private_data;
9914 size_t size;
9915 char buf[64];
9916 int order;
9917 int r;
9918
9919 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9920 size = (PAGE_SIZE << order) / 1024;
9921
9922 r = sprintf(buf, "%zd\n", size);
9923
9924 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9925 }
9926
9927 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9928 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9929 size_t cnt, loff_t *ppos)
9930 {
9931 struct trace_array *tr = filp->private_data;
9932 unsigned long val;
9933 int old_order;
9934 int order;
9935 int pages;
9936 int ret;
9937
9938 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9939 if (ret)
9940 return ret;
9941
9942 val *= 1024; /* value passed in is in KB */
9943
9944 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9945 order = fls(pages - 1);
9946
9947 /* limit between 1 and 128 system pages */
9948 if (order < 0 || order > 7)
9949 return -EINVAL;
9950
9951 /* Do not allow tracing while changing the order of the ring buffer */
9952 tracing_stop_tr(tr);
9953
9954 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9955 if (old_order == order)
9956 goto out;
9957
9958 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9959 if (ret)
9960 goto out;
9961
9962 #ifdef CONFIG_TRACER_MAX_TRACE
9963
9964 if (!tr->allocated_snapshot)
9965 goto out_max;
9966
9967 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9968 if (ret) {
9969 /* Put back the old order */
9970 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9971 if (WARN_ON_ONCE(cnt)) {
9972 /*
9973 * AARGH! We are left with different orders!
9974 * The max buffer is our "snapshot" buffer.
9975 * When a tracer needs a snapshot (one of the
9976 * latency tracers), it swaps the max buffer
9977 * with the saved snap shot. We succeeded to
9978 * update the order of the main buffer, but failed to
9979 * update the order of the max buffer. But when we tried
9980 * to reset the main buffer to the original size, we
9981 * failed there too. This is very unlikely to
9982 * happen, but if it does, warn and kill all
9983 * tracing.
9984 */
9985 tracing_disabled = 1;
9986 }
9987 goto out;
9988 }
9989 out_max:
9990 #endif
9991 (*ppos)++;
9992 out:
9993 if (ret)
9994 cnt = ret;
9995 tracing_start_tr(tr);
9996 return cnt;
9997 }
9998
9999 static const struct file_operations buffer_subbuf_size_fops = {
10000 .open = tracing_open_generic_tr,
10001 .read = buffer_subbuf_size_read,
10002 .write = buffer_subbuf_size_write,
10003 .release = tracing_release_generic_tr,
10004 .llseek = default_llseek,
10005 };
10006
10007 static struct dentry *trace_instance_dir;
10008
10009 static void
10010 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
10011
10012 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)10013 static int make_mod_delta(struct module *mod, void *data)
10014 {
10015 struct trace_module_delta *module_delta;
10016 struct trace_scratch *tscratch;
10017 struct trace_mod_entry *entry;
10018 struct trace_array *tr = data;
10019 int i;
10020
10021 tscratch = tr->scratch;
10022 module_delta = READ_ONCE(tr->module_delta);
10023 for (i = 0; i < tscratch->nr_entries; i++) {
10024 entry = &tscratch->entries[i];
10025 if (strcmp(mod->name, entry->mod_name))
10026 continue;
10027 if (mod->state == MODULE_STATE_GOING)
10028 module_delta->delta[i] = 0;
10029 else
10030 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
10031 - entry->mod_addr;
10032 break;
10033 }
10034 return 0;
10035 }
10036 #else
make_mod_delta(struct module * mod,void * data)10037 static int make_mod_delta(struct module *mod, void *data)
10038 {
10039 return 0;
10040 }
10041 #endif
10042
mod_addr_comp(const void * a,const void * b,const void * data)10043 static int mod_addr_comp(const void *a, const void *b, const void *data)
10044 {
10045 const struct trace_mod_entry *e1 = a;
10046 const struct trace_mod_entry *e2 = b;
10047
10048 return e1->mod_addr > e2->mod_addr ? 1 : -1;
10049 }
10050
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)10051 static void setup_trace_scratch(struct trace_array *tr,
10052 struct trace_scratch *tscratch, unsigned int size)
10053 {
10054 struct trace_module_delta *module_delta;
10055 struct trace_mod_entry *entry;
10056 int i, nr_entries;
10057
10058 if (!tscratch)
10059 return;
10060
10061 tr->scratch = tscratch;
10062 tr->scratch_size = size;
10063
10064 if (tscratch->text_addr)
10065 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
10066
10067 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
10068 goto reset;
10069
10070 /* Check if each module name is a valid string */
10071 for (i = 0; i < tscratch->nr_entries; i++) {
10072 int n;
10073
10074 entry = &tscratch->entries[i];
10075
10076 for (n = 0; n < MODULE_NAME_LEN; n++) {
10077 if (entry->mod_name[n] == '\0')
10078 break;
10079 if (!isprint(entry->mod_name[n]))
10080 goto reset;
10081 }
10082 if (n == MODULE_NAME_LEN)
10083 goto reset;
10084 }
10085
10086 /* Sort the entries so that we can find appropriate module from address. */
10087 nr_entries = tscratch->nr_entries;
10088 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
10089 mod_addr_comp, NULL, NULL);
10090
10091 if (IS_ENABLED(CONFIG_MODULES)) {
10092 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
10093 if (!module_delta) {
10094 pr_info("module_delta allocation failed. Not able to decode module address.");
10095 goto reset;
10096 }
10097 init_rcu_head(&module_delta->rcu);
10098 } else
10099 module_delta = NULL;
10100 WRITE_ONCE(tr->module_delta, module_delta);
10101
10102 /* Scan modules to make text delta for modules. */
10103 module_for_each_mod(make_mod_delta, tr);
10104
10105 /* Set trace_clock as the same of the previous boot. */
10106 if (tscratch->clock_id != tr->clock_id) {
10107 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
10108 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
10109 pr_info("the previous trace_clock info is not valid.");
10110 goto reset;
10111 }
10112 }
10113 return;
10114 reset:
10115 /* Invalid trace modules */
10116 memset(tscratch, 0, size);
10117 }
10118
10119 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)10120 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
10121 {
10122 enum ring_buffer_flags rb_flags;
10123 struct trace_scratch *tscratch;
10124 unsigned int scratch_size = 0;
10125
10126 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
10127
10128 buf->tr = tr;
10129
10130 if (tr->range_addr_start && tr->range_addr_size) {
10131 /* Add scratch buffer to handle 128 modules */
10132 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
10133 tr->range_addr_start,
10134 tr->range_addr_size,
10135 struct_size(tscratch, entries, 128));
10136
10137 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
10138 setup_trace_scratch(tr, tscratch, scratch_size);
10139
10140 /*
10141 * This is basically the same as a mapped buffer,
10142 * with the same restrictions.
10143 */
10144 tr->mapped++;
10145 } else {
10146 buf->buffer = ring_buffer_alloc(size, rb_flags);
10147 }
10148 if (!buf->buffer)
10149 return -ENOMEM;
10150
10151 buf->data = alloc_percpu(struct trace_array_cpu);
10152 if (!buf->data) {
10153 ring_buffer_free(buf->buffer);
10154 buf->buffer = NULL;
10155 return -ENOMEM;
10156 }
10157
10158 /* Allocate the first page for all buffers */
10159 set_buffer_entries(&tr->array_buffer,
10160 ring_buffer_size(tr->array_buffer.buffer, 0));
10161
10162 return 0;
10163 }
10164
free_trace_buffer(struct array_buffer * buf)10165 static void free_trace_buffer(struct array_buffer *buf)
10166 {
10167 if (buf->buffer) {
10168 ring_buffer_free(buf->buffer);
10169 buf->buffer = NULL;
10170 free_percpu(buf->data);
10171 buf->data = NULL;
10172 }
10173 }
10174
allocate_trace_buffers(struct trace_array * tr,int size)10175 static int allocate_trace_buffers(struct trace_array *tr, int size)
10176 {
10177 int ret;
10178
10179 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
10180 if (ret)
10181 return ret;
10182
10183 #ifdef CONFIG_TRACER_MAX_TRACE
10184 /* Fix mapped buffer trace arrays do not have snapshot buffers */
10185 if (tr->range_addr_start)
10186 return 0;
10187
10188 ret = allocate_trace_buffer(tr, &tr->max_buffer,
10189 allocate_snapshot ? size : 1);
10190 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
10191 free_trace_buffer(&tr->array_buffer);
10192 return -ENOMEM;
10193 }
10194 tr->allocated_snapshot = allocate_snapshot;
10195
10196 allocate_snapshot = false;
10197 #endif
10198
10199 return 0;
10200 }
10201
free_trace_buffers(struct trace_array * tr)10202 static void free_trace_buffers(struct trace_array *tr)
10203 {
10204 if (!tr)
10205 return;
10206
10207 free_trace_buffer(&tr->array_buffer);
10208 kfree(tr->module_delta);
10209
10210 #ifdef CONFIG_TRACER_MAX_TRACE
10211 free_trace_buffer(&tr->max_buffer);
10212 #endif
10213 }
10214
init_trace_flags_index(struct trace_array * tr)10215 static void init_trace_flags_index(struct trace_array *tr)
10216 {
10217 int i;
10218
10219 /* Used by the trace options files */
10220 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
10221 tr->trace_flags_index[i] = i;
10222 }
10223
__update_tracer(struct trace_array * tr)10224 static int __update_tracer(struct trace_array *tr)
10225 {
10226 struct tracer *t;
10227 int ret = 0;
10228
10229 for (t = trace_types; t && !ret; t = t->next)
10230 ret = add_tracer(tr, t);
10231
10232 return ret;
10233 }
10234
__update_tracer_options(struct trace_array * tr)10235 static __init int __update_tracer_options(struct trace_array *tr)
10236 {
10237 struct tracers *t;
10238 int ret = 0;
10239
10240 list_for_each_entry(t, &tr->tracers, list) {
10241 ret = add_tracer_options(tr, t);
10242 if (ret < 0)
10243 break;
10244 }
10245
10246 return ret;
10247 }
10248
update_tracer_options(void)10249 static __init void update_tracer_options(void)
10250 {
10251 struct trace_array *tr;
10252
10253 guard(mutex)(&trace_types_lock);
10254 tracer_options_updated = true;
10255 list_for_each_entry(tr, &ftrace_trace_arrays, list)
10256 __update_tracer_options(tr);
10257 }
10258
10259 /* Must have trace_types_lock held */
trace_array_find(const char * instance)10260 struct trace_array *trace_array_find(const char *instance)
10261 {
10262 struct trace_array *tr, *found = NULL;
10263
10264 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10265 if (tr->name && strcmp(tr->name, instance) == 0) {
10266 found = tr;
10267 break;
10268 }
10269 }
10270
10271 return found;
10272 }
10273
trace_array_find_get(const char * instance)10274 struct trace_array *trace_array_find_get(const char *instance)
10275 {
10276 struct trace_array *tr;
10277
10278 guard(mutex)(&trace_types_lock);
10279 tr = trace_array_find(instance);
10280 if (tr)
10281 tr->ref++;
10282
10283 return tr;
10284 }
10285
trace_array_create_dir(struct trace_array * tr)10286 static int trace_array_create_dir(struct trace_array *tr)
10287 {
10288 int ret;
10289
10290 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
10291 if (!tr->dir)
10292 return -EINVAL;
10293
10294 ret = event_trace_add_tracer(tr->dir, tr);
10295 if (ret) {
10296 tracefs_remove(tr->dir);
10297 return ret;
10298 }
10299
10300 init_tracer_tracefs(tr, tr->dir);
10301 ret = __update_tracer(tr);
10302 if (ret) {
10303 event_trace_del_tracer(tr);
10304 tracefs_remove(tr->dir);
10305 return ret;
10306 }
10307 return 0;
10308 }
10309
10310 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)10311 trace_array_create_systems(const char *name, const char *systems,
10312 unsigned long range_addr_start,
10313 unsigned long range_addr_size)
10314 {
10315 struct trace_array *tr;
10316 int ret;
10317
10318 ret = -ENOMEM;
10319 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
10320 if (!tr)
10321 return ERR_PTR(ret);
10322
10323 tr->name = kstrdup(name, GFP_KERNEL);
10324 if (!tr->name)
10325 goto out_free_tr;
10326
10327 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
10328 goto out_free_tr;
10329
10330 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
10331 goto out_free_tr;
10332
10333 if (systems) {
10334 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
10335 if (!tr->system_names)
10336 goto out_free_tr;
10337 }
10338
10339 /* Only for boot up memory mapped ring buffers */
10340 tr->range_addr_start = range_addr_start;
10341 tr->range_addr_size = range_addr_size;
10342
10343 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
10344
10345 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
10346
10347 raw_spin_lock_init(&tr->start_lock);
10348
10349 tr->syscall_buf_sz = global_trace.syscall_buf_sz;
10350
10351 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10352 #ifdef CONFIG_TRACER_MAX_TRACE
10353 spin_lock_init(&tr->snapshot_trigger_lock);
10354 #endif
10355 tr->current_trace = &nop_trace;
10356 tr->current_trace_flags = nop_trace.flags;
10357
10358 INIT_LIST_HEAD(&tr->systems);
10359 INIT_LIST_HEAD(&tr->events);
10360 INIT_LIST_HEAD(&tr->hist_vars);
10361 INIT_LIST_HEAD(&tr->err_log);
10362 INIT_LIST_HEAD(&tr->tracers);
10363 INIT_LIST_HEAD(&tr->marker_list);
10364
10365 #ifdef CONFIG_MODULES
10366 INIT_LIST_HEAD(&tr->mod_events);
10367 #endif
10368
10369 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
10370 goto out_free_tr;
10371
10372 /* The ring buffer is defaultly expanded */
10373 trace_set_ring_buffer_expanded(tr);
10374
10375 if (ftrace_allocate_ftrace_ops(tr) < 0)
10376 goto out_free_tr;
10377
10378 ftrace_init_trace_array(tr);
10379
10380 init_trace_flags_index(tr);
10381
10382 if (trace_instance_dir) {
10383 ret = trace_array_create_dir(tr);
10384 if (ret)
10385 goto out_free_tr;
10386 } else
10387 __trace_early_add_events(tr);
10388
10389 list_add(&tr->list, &ftrace_trace_arrays);
10390
10391 tr->ref++;
10392
10393 return tr;
10394
10395 out_free_tr:
10396 ftrace_free_ftrace_ops(tr);
10397 free_trace_buffers(tr);
10398 free_cpumask_var(tr->pipe_cpumask);
10399 free_cpumask_var(tr->tracing_cpumask);
10400 kfree_const(tr->system_names);
10401 kfree(tr->range_name);
10402 kfree(tr->name);
10403 kfree(tr);
10404
10405 return ERR_PTR(ret);
10406 }
10407
trace_array_create(const char * name)10408 static struct trace_array *trace_array_create(const char *name)
10409 {
10410 return trace_array_create_systems(name, NULL, 0, 0);
10411 }
10412
instance_mkdir(const char * name)10413 static int instance_mkdir(const char *name)
10414 {
10415 struct trace_array *tr;
10416 int ret;
10417
10418 guard(mutex)(&event_mutex);
10419 guard(mutex)(&trace_types_lock);
10420
10421 ret = -EEXIST;
10422 if (trace_array_find(name))
10423 return -EEXIST;
10424
10425 tr = trace_array_create(name);
10426
10427 ret = PTR_ERR_OR_ZERO(tr);
10428
10429 return ret;
10430 }
10431
10432 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)10433 static u64 map_pages(unsigned long start, unsigned long size)
10434 {
10435 unsigned long vmap_start, vmap_end;
10436 struct vm_struct *area;
10437 int ret;
10438
10439 area = get_vm_area(size, VM_IOREMAP);
10440 if (!area)
10441 return 0;
10442
10443 vmap_start = (unsigned long) area->addr;
10444 vmap_end = vmap_start + size;
10445
10446 ret = vmap_page_range(vmap_start, vmap_end,
10447 start, pgprot_nx(PAGE_KERNEL));
10448 if (ret < 0) {
10449 free_vm_area(area);
10450 return 0;
10451 }
10452
10453 return (u64)vmap_start;
10454 }
10455 #else
map_pages(unsigned long start,unsigned long size)10456 static inline u64 map_pages(unsigned long start, unsigned long size)
10457 {
10458 return 0;
10459 }
10460 #endif
10461
10462 /**
10463 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10464 * @name: The name of the trace array to be looked up/created.
10465 * @systems: A list of systems to create event directories for (NULL for all)
10466 *
10467 * Returns pointer to trace array with given name.
10468 * NULL, if it cannot be created.
10469 *
10470 * NOTE: This function increments the reference counter associated with the
10471 * trace array returned. This makes sure it cannot be freed while in use.
10472 * Use trace_array_put() once the trace array is no longer needed.
10473 * If the trace_array is to be freed, trace_array_destroy() needs to
10474 * be called after the trace_array_put(), or simply let user space delete
10475 * it from the tracefs instances directory. But until the
10476 * trace_array_put() is called, user space can not delete it.
10477 *
10478 */
trace_array_get_by_name(const char * name,const char * systems)10479 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10480 {
10481 struct trace_array *tr;
10482
10483 guard(mutex)(&event_mutex);
10484 guard(mutex)(&trace_types_lock);
10485
10486 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10487 if (tr->name && strcmp(tr->name, name) == 0) {
10488 tr->ref++;
10489 return tr;
10490 }
10491 }
10492
10493 tr = trace_array_create_systems(name, systems, 0, 0);
10494
10495 if (IS_ERR(tr))
10496 tr = NULL;
10497 else
10498 tr->ref++;
10499
10500 return tr;
10501 }
10502 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10503
__remove_instance(struct trace_array * tr)10504 static int __remove_instance(struct trace_array *tr)
10505 {
10506 int i;
10507
10508 /* Reference counter for a newly created trace array = 1. */
10509 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10510 return -EBUSY;
10511
10512 list_del(&tr->list);
10513
10514 /* Disable all the flags that were enabled coming in */
10515 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10516 if ((1ULL << i) & ZEROED_TRACE_FLAGS)
10517 set_tracer_flag(tr, 1ULL << i, 0);
10518 }
10519
10520 if (printk_trace == tr)
10521 update_printk_trace(&global_trace);
10522
10523 if (update_marker_trace(tr, 0))
10524 synchronize_rcu();
10525
10526 tracing_set_nop(tr);
10527 clear_ftrace_function_probes(tr);
10528 event_trace_del_tracer(tr);
10529 ftrace_clear_pids(tr);
10530 ftrace_destroy_function_files(tr);
10531 tracefs_remove(tr->dir);
10532 free_percpu(tr->last_func_repeats);
10533 free_trace_buffers(tr);
10534 clear_tracing_err_log(tr);
10535 free_tracers(tr);
10536
10537 if (tr->range_name) {
10538 reserve_mem_release_by_name(tr->range_name);
10539 kfree(tr->range_name);
10540 }
10541 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
10542 vfree((void *)tr->range_addr_start);
10543
10544 for (i = 0; i < tr->nr_topts; i++) {
10545 kfree(tr->topts[i].topts);
10546 }
10547 kfree(tr->topts);
10548
10549 free_cpumask_var(tr->pipe_cpumask);
10550 free_cpumask_var(tr->tracing_cpumask);
10551 kfree_const(tr->system_names);
10552 kfree(tr->name);
10553 kfree(tr);
10554
10555 return 0;
10556 }
10557
trace_array_destroy(struct trace_array * this_tr)10558 int trace_array_destroy(struct trace_array *this_tr)
10559 {
10560 struct trace_array *tr;
10561
10562 if (!this_tr)
10563 return -EINVAL;
10564
10565 guard(mutex)(&event_mutex);
10566 guard(mutex)(&trace_types_lock);
10567
10568
10569 /* Making sure trace array exists before destroying it. */
10570 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10571 if (tr == this_tr)
10572 return __remove_instance(tr);
10573 }
10574
10575 return -ENODEV;
10576 }
10577 EXPORT_SYMBOL_GPL(trace_array_destroy);
10578
instance_rmdir(const char * name)10579 static int instance_rmdir(const char *name)
10580 {
10581 struct trace_array *tr;
10582
10583 guard(mutex)(&event_mutex);
10584 guard(mutex)(&trace_types_lock);
10585
10586 tr = trace_array_find(name);
10587 if (!tr)
10588 return -ENODEV;
10589
10590 return __remove_instance(tr);
10591 }
10592
create_trace_instances(struct dentry * d_tracer)10593 static __init void create_trace_instances(struct dentry *d_tracer)
10594 {
10595 struct trace_array *tr;
10596
10597 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10598 instance_mkdir,
10599 instance_rmdir);
10600 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10601 return;
10602
10603 guard(mutex)(&event_mutex);
10604 guard(mutex)(&trace_types_lock);
10605
10606 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10607 if (!tr->name)
10608 continue;
10609 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10610 "Failed to create instance directory\n"))
10611 return;
10612 }
10613 }
10614
10615 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10616 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10617 {
10618 int cpu;
10619
10620 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10621 tr, &show_traces_fops);
10622
10623 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10624 tr, &set_tracer_fops);
10625
10626 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10627 tr, &tracing_cpumask_fops);
10628
10629 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10630 tr, &tracing_iter_fops);
10631
10632 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10633 tr, &tracing_fops);
10634
10635 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10636 tr, &tracing_pipe_fops);
10637
10638 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10639 tr, &tracing_entries_fops);
10640
10641 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10642 tr, &tracing_total_entries_fops);
10643
10644 trace_create_file("free_buffer", 0200, d_tracer,
10645 tr, &tracing_free_buffer_fops);
10646
10647 trace_create_file("trace_marker", 0220, d_tracer,
10648 tr, &tracing_mark_fops);
10649
10650 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10651
10652 trace_create_file("trace_marker_raw", 0220, d_tracer,
10653 tr, &tracing_mark_raw_fops);
10654
10655 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10656 &trace_clock_fops);
10657
10658 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10659 tr, &rb_simple_fops);
10660
10661 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10662 &trace_time_stamp_mode_fops);
10663
10664 tr->buffer_percent = 50;
10665
10666 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10667 tr, &buffer_percent_fops);
10668
10669 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10670 tr, &buffer_subbuf_size_fops);
10671
10672 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
10673 tr, &tracing_syscall_buf_fops);
10674
10675 create_trace_options_dir(tr);
10676
10677 #ifdef CONFIG_TRACER_MAX_TRACE
10678 trace_create_maxlat_file(tr, d_tracer);
10679 #endif
10680
10681 if (ftrace_create_function_files(tr, d_tracer))
10682 MEM_FAIL(1, "Could not allocate function filter files");
10683
10684 if (tr->range_addr_start) {
10685 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10686 tr, &last_boot_fops);
10687 #ifdef CONFIG_TRACER_SNAPSHOT
10688 } else {
10689 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10690 tr, &snapshot_fops);
10691 #endif
10692 }
10693
10694 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10695 tr, &tracing_err_log_fops);
10696
10697 for_each_tracing_cpu(cpu)
10698 tracing_init_tracefs_percpu(tr, cpu);
10699
10700 ftrace_init_tracefs(tr, d_tracer);
10701 }
10702
10703 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10704 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10705 {
10706 struct vfsmount *mnt;
10707 struct file_system_type *type;
10708 struct fs_context *fc;
10709 int ret;
10710
10711 /*
10712 * To maintain backward compatibility for tools that mount
10713 * debugfs to get to the tracing facility, tracefs is automatically
10714 * mounted to the debugfs/tracing directory.
10715 */
10716 type = get_fs_type("tracefs");
10717 if (!type)
10718 return NULL;
10719
10720 fc = fs_context_for_submount(type, mntpt);
10721 put_filesystem(type);
10722 if (IS_ERR(fc))
10723 return ERR_CAST(fc);
10724
10725 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10726
10727 ret = vfs_parse_fs_string(fc, "source", "tracefs");
10728 if (!ret)
10729 mnt = fc_mount(fc);
10730 else
10731 mnt = ERR_PTR(ret);
10732
10733 put_fs_context(fc);
10734 return mnt;
10735 }
10736 #endif
10737
10738 /**
10739 * tracing_init_dentry - initialize top level trace array
10740 *
10741 * This is called when creating files or directories in the tracing
10742 * directory. It is called via fs_initcall() by any of the boot up code
10743 * and expects to return the dentry of the top level tracing directory.
10744 */
tracing_init_dentry(void)10745 int tracing_init_dentry(void)
10746 {
10747 struct trace_array *tr = &global_trace;
10748
10749 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10750 pr_warn("Tracing disabled due to lockdown\n");
10751 return -EPERM;
10752 }
10753
10754 /* The top level trace array uses NULL as parent */
10755 if (tr->dir)
10756 return 0;
10757
10758 if (WARN_ON(!tracefs_initialized()))
10759 return -ENODEV;
10760
10761 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10762 /*
10763 * As there may still be users that expect the tracing
10764 * files to exist in debugfs/tracing, we must automount
10765 * the tracefs file system there, so older tools still
10766 * work with the newer kernel.
10767 */
10768 tr->dir = debugfs_create_automount("tracing", NULL,
10769 trace_automount, NULL);
10770 #endif
10771
10772 return 0;
10773 }
10774
10775 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10776 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10777
10778 static struct workqueue_struct *eval_map_wq __initdata;
10779 static struct work_struct eval_map_work __initdata;
10780 static struct work_struct tracerfs_init_work __initdata;
10781
eval_map_work_func(struct work_struct * work)10782 static void __init eval_map_work_func(struct work_struct *work)
10783 {
10784 int len;
10785
10786 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10787 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10788 }
10789
trace_eval_init(void)10790 static int __init trace_eval_init(void)
10791 {
10792 INIT_WORK(&eval_map_work, eval_map_work_func);
10793
10794 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10795 if (!eval_map_wq) {
10796 pr_err("Unable to allocate eval_map_wq\n");
10797 /* Do work here */
10798 eval_map_work_func(&eval_map_work);
10799 return -ENOMEM;
10800 }
10801
10802 queue_work(eval_map_wq, &eval_map_work);
10803 return 0;
10804 }
10805
10806 subsys_initcall(trace_eval_init);
10807
trace_eval_sync(void)10808 static int __init trace_eval_sync(void)
10809 {
10810 /* Make sure the eval map updates are finished */
10811 if (eval_map_wq)
10812 destroy_workqueue(eval_map_wq);
10813 return 0;
10814 }
10815
10816 late_initcall_sync(trace_eval_sync);
10817
10818
10819 #ifdef CONFIG_MODULES
10820
module_exists(const char * module)10821 bool module_exists(const char *module)
10822 {
10823 /* All modules have the symbol __this_module */
10824 static const char this_mod[] = "__this_module";
10825 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10826 unsigned long val;
10827 int n;
10828
10829 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10830
10831 if (n > sizeof(modname) - 1)
10832 return false;
10833
10834 val = module_kallsyms_lookup_name(modname);
10835 return val != 0;
10836 }
10837
trace_module_add_evals(struct module * mod)10838 static void trace_module_add_evals(struct module *mod)
10839 {
10840 /*
10841 * Modules with bad taint do not have events created, do
10842 * not bother with enums either.
10843 */
10844 if (trace_module_has_bad_taint(mod))
10845 return;
10846
10847 /* Even if no trace_evals, this need to sanitize field types. */
10848 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10849 }
10850
10851 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10852 static void trace_module_remove_evals(struct module *mod)
10853 {
10854 union trace_eval_map_item *map;
10855 union trace_eval_map_item **last = &trace_eval_maps;
10856
10857 if (!mod->num_trace_evals)
10858 return;
10859
10860 guard(mutex)(&trace_eval_mutex);
10861
10862 map = trace_eval_maps;
10863
10864 while (map) {
10865 if (map->head.mod == mod)
10866 break;
10867 map = trace_eval_jmp_to_tail(map);
10868 last = &map->tail.next;
10869 map = map->tail.next;
10870 }
10871 if (!map)
10872 return;
10873
10874 *last = trace_eval_jmp_to_tail(map)->tail.next;
10875 kfree(map);
10876 }
10877 #else
trace_module_remove_evals(struct module * mod)10878 static inline void trace_module_remove_evals(struct module *mod) { }
10879 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10880
trace_module_record(struct module * mod,bool add)10881 static void trace_module_record(struct module *mod, bool add)
10882 {
10883 struct trace_array *tr;
10884 unsigned long flags;
10885
10886 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10887 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10888 /* Update any persistent trace array that has already been started */
10889 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10890 guard(mutex)(&scratch_mutex);
10891 save_mod(mod, tr);
10892 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10893 /* Update delta if the module loaded in previous boot */
10894 make_mod_delta(mod, tr);
10895 }
10896 }
10897 }
10898
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10899 static int trace_module_notify(struct notifier_block *self,
10900 unsigned long val, void *data)
10901 {
10902 struct module *mod = data;
10903
10904 switch (val) {
10905 case MODULE_STATE_COMING:
10906 trace_module_add_evals(mod);
10907 trace_module_record(mod, true);
10908 break;
10909 case MODULE_STATE_GOING:
10910 trace_module_remove_evals(mod);
10911 trace_module_record(mod, false);
10912 break;
10913 }
10914
10915 return NOTIFY_OK;
10916 }
10917
10918 static struct notifier_block trace_module_nb = {
10919 .notifier_call = trace_module_notify,
10920 .priority = 0,
10921 };
10922 #endif /* CONFIG_MODULES */
10923
tracer_init_tracefs_work_func(struct work_struct * work)10924 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10925 {
10926
10927 event_trace_init();
10928
10929 init_tracer_tracefs(&global_trace, NULL);
10930 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10931
10932 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10933 &global_trace, &tracing_thresh_fops);
10934
10935 trace_create_file("README", TRACE_MODE_READ, NULL,
10936 NULL, &tracing_readme_fops);
10937
10938 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10939 NULL, &tracing_saved_cmdlines_fops);
10940
10941 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10942 NULL, &tracing_saved_cmdlines_size_fops);
10943
10944 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10945 NULL, &tracing_saved_tgids_fops);
10946
10947 trace_create_eval_file(NULL);
10948
10949 #ifdef CONFIG_MODULES
10950 register_module_notifier(&trace_module_nb);
10951 #endif
10952
10953 #ifdef CONFIG_DYNAMIC_FTRACE
10954 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10955 NULL, &tracing_dyn_info_fops);
10956 #endif
10957
10958 create_trace_instances(NULL);
10959
10960 update_tracer_options();
10961 }
10962
tracer_init_tracefs(void)10963 static __init int tracer_init_tracefs(void)
10964 {
10965 int ret;
10966
10967 trace_access_lock_init();
10968
10969 ret = tracing_init_dentry();
10970 if (ret)
10971 return 0;
10972
10973 if (eval_map_wq) {
10974 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10975 queue_work(eval_map_wq, &tracerfs_init_work);
10976 } else {
10977 tracer_init_tracefs_work_func(NULL);
10978 }
10979
10980 if (rv_init_interface())
10981 pr_err("RV: Error while creating the RV interface\n");
10982
10983 return 0;
10984 }
10985
10986 fs_initcall(tracer_init_tracefs);
10987
10988 static int trace_die_panic_handler(struct notifier_block *self,
10989 unsigned long ev, void *unused);
10990
10991 static struct notifier_block trace_panic_notifier = {
10992 .notifier_call = trace_die_panic_handler,
10993 .priority = INT_MAX - 1,
10994 };
10995
10996 static struct notifier_block trace_die_notifier = {
10997 .notifier_call = trace_die_panic_handler,
10998 .priority = INT_MAX - 1,
10999 };
11000
11001 /*
11002 * The idea is to execute the following die/panic callback early, in order
11003 * to avoid showing irrelevant information in the trace (like other panic
11004 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
11005 * warnings get disabled (to prevent potential log flooding).
11006 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)11007 static int trace_die_panic_handler(struct notifier_block *self,
11008 unsigned long ev, void *unused)
11009 {
11010 if (!ftrace_dump_on_oops_enabled())
11011 return NOTIFY_DONE;
11012
11013 /* The die notifier requires DIE_OOPS to trigger */
11014 if (self == &trace_die_notifier && ev != DIE_OOPS)
11015 return NOTIFY_DONE;
11016
11017 ftrace_dump(DUMP_PARAM);
11018
11019 return NOTIFY_DONE;
11020 }
11021
11022 /*
11023 * printk is set to max of 1024, we really don't need it that big.
11024 * Nothing should be printing 1000 characters anyway.
11025 */
11026 #define TRACE_MAX_PRINT 1000
11027
11028 /*
11029 * Define here KERN_TRACE so that we have one place to modify
11030 * it if we decide to change what log level the ftrace dump
11031 * should be at.
11032 */
11033 #define KERN_TRACE KERN_EMERG
11034
11035 void
trace_printk_seq(struct trace_seq * s)11036 trace_printk_seq(struct trace_seq *s)
11037 {
11038 /* Probably should print a warning here. */
11039 if (s->seq.len >= TRACE_MAX_PRINT)
11040 s->seq.len = TRACE_MAX_PRINT;
11041
11042 /*
11043 * More paranoid code. Although the buffer size is set to
11044 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
11045 * an extra layer of protection.
11046 */
11047 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
11048 s->seq.len = s->seq.size - 1;
11049
11050 /* should be zero ended, but we are paranoid. */
11051 s->buffer[s->seq.len] = 0;
11052
11053 printk(KERN_TRACE "%s", s->buffer);
11054
11055 trace_seq_init(s);
11056 }
11057
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)11058 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
11059 {
11060 iter->tr = tr;
11061 iter->trace = iter->tr->current_trace;
11062 iter->cpu_file = RING_BUFFER_ALL_CPUS;
11063 iter->array_buffer = &tr->array_buffer;
11064
11065 if (iter->trace && iter->trace->open)
11066 iter->trace->open(iter);
11067
11068 /* Annotate start of buffers if we had overruns */
11069 if (ring_buffer_overruns(iter->array_buffer->buffer))
11070 iter->iter_flags |= TRACE_FILE_ANNOTATE;
11071
11072 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
11073 if (trace_clocks[iter->tr->clock_id].in_ns)
11074 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
11075
11076 /* Can not use kmalloc for iter.temp and iter.fmt */
11077 iter->temp = static_temp_buf;
11078 iter->temp_size = STATIC_TEMP_BUF_SIZE;
11079 iter->fmt = static_fmt_buf;
11080 iter->fmt_size = STATIC_FMT_BUF_SIZE;
11081 }
11082
trace_init_global_iter(struct trace_iterator * iter)11083 void trace_init_global_iter(struct trace_iterator *iter)
11084 {
11085 trace_init_iter(iter, &global_trace);
11086 }
11087
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)11088 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
11089 {
11090 /* use static because iter can be a bit big for the stack */
11091 static struct trace_iterator iter;
11092 unsigned int old_userobj;
11093 unsigned long flags;
11094 int cnt = 0;
11095
11096 /*
11097 * Always turn off tracing when we dump.
11098 * We don't need to show trace output of what happens
11099 * between multiple crashes.
11100 *
11101 * If the user does a sysrq-z, then they can re-enable
11102 * tracing with echo 1 > tracing_on.
11103 */
11104 tracer_tracing_off(tr);
11105
11106 local_irq_save(flags);
11107
11108 /* Simulate the iterator */
11109 trace_init_iter(&iter, tr);
11110
11111 /* While dumping, do not allow the buffer to be enable */
11112 tracer_tracing_disable(tr);
11113
11114 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
11115
11116 /* don't look at user memory in panic mode */
11117 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
11118
11119 if (dump_mode == DUMP_ORIG)
11120 iter.cpu_file = raw_smp_processor_id();
11121 else
11122 iter.cpu_file = RING_BUFFER_ALL_CPUS;
11123
11124 if (tr == &global_trace)
11125 printk(KERN_TRACE "Dumping ftrace buffer:\n");
11126 else
11127 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
11128
11129 /* Did function tracer already get disabled? */
11130 if (ftrace_is_dead()) {
11131 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
11132 printk("# MAY BE MISSING FUNCTION EVENTS\n");
11133 }
11134
11135 /*
11136 * We need to stop all tracing on all CPUS to read
11137 * the next buffer. This is a bit expensive, but is
11138 * not done often. We fill all what we can read,
11139 * and then release the locks again.
11140 */
11141
11142 while (!trace_empty(&iter)) {
11143
11144 if (!cnt)
11145 printk(KERN_TRACE "---------------------------------\n");
11146
11147 cnt++;
11148
11149 trace_iterator_reset(&iter);
11150 iter.iter_flags |= TRACE_FILE_LAT_FMT;
11151
11152 if (trace_find_next_entry_inc(&iter) != NULL) {
11153 int ret;
11154
11155 ret = print_trace_line(&iter);
11156 if (ret != TRACE_TYPE_NO_CONSUME)
11157 trace_consume(&iter);
11158
11159 trace_printk_seq(&iter.seq);
11160 }
11161 touch_nmi_watchdog();
11162 }
11163
11164 if (!cnt)
11165 printk(KERN_TRACE " (ftrace buffer empty)\n");
11166 else
11167 printk(KERN_TRACE "---------------------------------\n");
11168
11169 tr->trace_flags |= old_userobj;
11170
11171 tracer_tracing_enable(tr);
11172 local_irq_restore(flags);
11173 }
11174
ftrace_dump_by_param(void)11175 static void ftrace_dump_by_param(void)
11176 {
11177 bool first_param = true;
11178 char dump_param[MAX_TRACER_SIZE];
11179 char *buf, *token, *inst_name;
11180 struct trace_array *tr;
11181
11182 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
11183 buf = dump_param;
11184
11185 while ((token = strsep(&buf, ",")) != NULL) {
11186 if (first_param) {
11187 first_param = false;
11188 if (!strcmp("0", token))
11189 continue;
11190 else if (!strcmp("1", token)) {
11191 ftrace_dump_one(&global_trace, DUMP_ALL);
11192 continue;
11193 }
11194 else if (!strcmp("2", token) ||
11195 !strcmp("orig_cpu", token)) {
11196 ftrace_dump_one(&global_trace, DUMP_ORIG);
11197 continue;
11198 }
11199 }
11200
11201 inst_name = strsep(&token, "=");
11202 tr = trace_array_find(inst_name);
11203 if (!tr) {
11204 printk(KERN_TRACE "Instance %s not found\n", inst_name);
11205 continue;
11206 }
11207
11208 if (token && (!strcmp("2", token) ||
11209 !strcmp("orig_cpu", token)))
11210 ftrace_dump_one(tr, DUMP_ORIG);
11211 else
11212 ftrace_dump_one(tr, DUMP_ALL);
11213 }
11214 }
11215
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)11216 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
11217 {
11218 static atomic_t dump_running;
11219
11220 /* Only allow one dump user at a time. */
11221 if (atomic_inc_return(&dump_running) != 1) {
11222 atomic_dec(&dump_running);
11223 return;
11224 }
11225
11226 switch (oops_dump_mode) {
11227 case DUMP_ALL:
11228 ftrace_dump_one(&global_trace, DUMP_ALL);
11229 break;
11230 case DUMP_ORIG:
11231 ftrace_dump_one(&global_trace, DUMP_ORIG);
11232 break;
11233 case DUMP_PARAM:
11234 ftrace_dump_by_param();
11235 break;
11236 case DUMP_NONE:
11237 break;
11238 default:
11239 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
11240 ftrace_dump_one(&global_trace, DUMP_ALL);
11241 }
11242
11243 atomic_dec(&dump_running);
11244 }
11245 EXPORT_SYMBOL_GPL(ftrace_dump);
11246
11247 #define WRITE_BUFSIZE 4096
11248
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))11249 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
11250 size_t count, loff_t *ppos,
11251 int (*createfn)(const char *))
11252 {
11253 char *kbuf __free(kfree) = NULL;
11254 char *buf, *tmp;
11255 int ret = 0;
11256 size_t done = 0;
11257 size_t size;
11258
11259 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
11260 if (!kbuf)
11261 return -ENOMEM;
11262
11263 while (done < count) {
11264 size = count - done;
11265
11266 if (size >= WRITE_BUFSIZE)
11267 size = WRITE_BUFSIZE - 1;
11268
11269 if (copy_from_user(kbuf, buffer + done, size))
11270 return -EFAULT;
11271
11272 kbuf[size] = '\0';
11273 buf = kbuf;
11274 do {
11275 tmp = strchr(buf, '\n');
11276 if (tmp) {
11277 *tmp = '\0';
11278 size = tmp - buf + 1;
11279 } else {
11280 size = strlen(buf);
11281 if (done + size < count) {
11282 if (buf != kbuf)
11283 break;
11284 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
11285 pr_warn("Line length is too long: Should be less than %d\n",
11286 WRITE_BUFSIZE - 2);
11287 return -EINVAL;
11288 }
11289 }
11290 done += size;
11291
11292 /* Remove comments */
11293 tmp = strchr(buf, '#');
11294
11295 if (tmp)
11296 *tmp = '\0';
11297
11298 ret = createfn(buf);
11299 if (ret)
11300 return ret;
11301 buf += size;
11302
11303 } while (done < count);
11304 }
11305 return done;
11306 }
11307
11308 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)11309 __init static bool tr_needs_alloc_snapshot(const char *name)
11310 {
11311 char *test;
11312 int len = strlen(name);
11313 bool ret;
11314
11315 if (!boot_snapshot_index)
11316 return false;
11317
11318 if (strncmp(name, boot_snapshot_info, len) == 0 &&
11319 boot_snapshot_info[len] == '\t')
11320 return true;
11321
11322 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
11323 if (!test)
11324 return false;
11325
11326 sprintf(test, "\t%s\t", name);
11327 ret = strstr(boot_snapshot_info, test) == NULL;
11328 kfree(test);
11329 return ret;
11330 }
11331
do_allocate_snapshot(const char * name)11332 __init static void do_allocate_snapshot(const char *name)
11333 {
11334 if (!tr_needs_alloc_snapshot(name))
11335 return;
11336
11337 /*
11338 * When allocate_snapshot is set, the next call to
11339 * allocate_trace_buffers() (called by trace_array_get_by_name())
11340 * will allocate the snapshot buffer. That will also clear
11341 * this flag.
11342 */
11343 allocate_snapshot = true;
11344 }
11345 #else
do_allocate_snapshot(const char * name)11346 static inline void do_allocate_snapshot(const char *name) { }
11347 #endif
11348
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)11349 __init static int backup_instance_area(const char *backup,
11350 unsigned long *addr, phys_addr_t *size)
11351 {
11352 struct trace_array *backup_tr;
11353 void *allocated_vaddr = NULL;
11354
11355 backup_tr = trace_array_get_by_name(backup, NULL);
11356 if (!backup_tr) {
11357 pr_warn("Tracing: Instance %s is not found.\n", backup);
11358 return -ENOENT;
11359 }
11360
11361 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
11362 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
11363 trace_array_put(backup_tr);
11364 return -EINVAL;
11365 }
11366
11367 *size = backup_tr->range_addr_size;
11368
11369 allocated_vaddr = vzalloc(*size);
11370 if (!allocated_vaddr) {
11371 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
11372 backup, (unsigned long)*size);
11373 trace_array_put(backup_tr);
11374 return -ENOMEM;
11375 }
11376
11377 memcpy(allocated_vaddr,
11378 (void *)backup_tr->range_addr_start, (size_t)*size);
11379 *addr = (unsigned long)allocated_vaddr;
11380
11381 trace_array_put(backup_tr);
11382 return 0;
11383 }
11384
enable_instances(void)11385 __init static void enable_instances(void)
11386 {
11387 struct trace_array *tr;
11388 bool memmap_area = false;
11389 char *curr_str;
11390 char *name;
11391 char *str;
11392 char *tok;
11393
11394 /* A tab is always appended */
11395 boot_instance_info[boot_instance_index - 1] = '\0';
11396 str = boot_instance_info;
11397
11398 while ((curr_str = strsep(&str, "\t"))) {
11399 phys_addr_t start = 0;
11400 phys_addr_t size = 0;
11401 unsigned long addr = 0;
11402 bool traceprintk = false;
11403 bool traceoff = false;
11404 char *flag_delim;
11405 char *addr_delim;
11406 char *rname __free(kfree) = NULL;
11407 char *backup;
11408
11409 tok = strsep(&curr_str, ",");
11410
11411 name = strsep(&tok, "=");
11412 backup = tok;
11413
11414 flag_delim = strchr(name, '^');
11415 addr_delim = strchr(name, '@');
11416
11417 if (addr_delim)
11418 *addr_delim++ = '\0';
11419
11420 if (flag_delim)
11421 *flag_delim++ = '\0';
11422
11423 if (backup) {
11424 if (backup_instance_area(backup, &addr, &size) < 0)
11425 continue;
11426 }
11427
11428 if (flag_delim) {
11429 char *flag;
11430
11431 while ((flag = strsep(&flag_delim, "^"))) {
11432 if (strcmp(flag, "traceoff") == 0) {
11433 traceoff = true;
11434 } else if ((strcmp(flag, "printk") == 0) ||
11435 (strcmp(flag, "traceprintk") == 0) ||
11436 (strcmp(flag, "trace_printk") == 0)) {
11437 traceprintk = true;
11438 } else {
11439 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
11440 flag, name);
11441 }
11442 }
11443 }
11444
11445 tok = addr_delim;
11446 if (tok && isdigit(*tok)) {
11447 start = memparse(tok, &tok);
11448 if (!start) {
11449 pr_warn("Tracing: Invalid boot instance address for %s\n",
11450 name);
11451 continue;
11452 }
11453 if (*tok != ':') {
11454 pr_warn("Tracing: No size specified for instance %s\n", name);
11455 continue;
11456 }
11457 tok++;
11458 size = memparse(tok, &tok);
11459 if (!size) {
11460 pr_warn("Tracing: Invalid boot instance size for %s\n",
11461 name);
11462 continue;
11463 }
11464 memmap_area = true;
11465 } else if (tok) {
11466 if (!reserve_mem_find_by_name(tok, &start, &size)) {
11467 start = 0;
11468 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
11469 continue;
11470 }
11471 rname = kstrdup(tok, GFP_KERNEL);
11472 }
11473
11474 if (start) {
11475 /* Start and size must be page aligned */
11476 if (start & ~PAGE_MASK) {
11477 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
11478 continue;
11479 }
11480 if (size & ~PAGE_MASK) {
11481 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
11482 continue;
11483 }
11484
11485 if (memmap_area)
11486 addr = map_pages(start, size);
11487 else
11488 addr = (unsigned long)phys_to_virt(start);
11489 if (addr) {
11490 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11491 name, &start, (unsigned long)size);
11492 } else {
11493 pr_warn("Tracing: Failed to map boot instance %s\n", name);
11494 continue;
11495 }
11496 } else {
11497 /* Only non mapped buffers have snapshot buffers */
11498 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11499 do_allocate_snapshot(name);
11500 }
11501
11502 tr = trace_array_create_systems(name, NULL, addr, size);
11503 if (IS_ERR(tr)) {
11504 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11505 continue;
11506 }
11507
11508 if (traceoff)
11509 tracer_tracing_off(tr);
11510
11511 if (traceprintk)
11512 update_printk_trace(tr);
11513
11514 /*
11515 * memmap'd buffers can not be freed.
11516 */
11517 if (memmap_area) {
11518 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11519 tr->ref++;
11520 }
11521
11522 /*
11523 * Backup buffers can be freed but need vfree().
11524 */
11525 if (backup)
11526 tr->flags |= TRACE_ARRAY_FL_VMALLOC;
11527
11528 if (start || backup) {
11529 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11530 tr->range_name = no_free_ptr(rname);
11531 }
11532
11533 while ((tok = strsep(&curr_str, ","))) {
11534 early_enable_events(tr, tok, true);
11535 }
11536 }
11537 }
11538
tracer_alloc_buffers(void)11539 __init static int tracer_alloc_buffers(void)
11540 {
11541 int ring_buf_size;
11542 int ret = -ENOMEM;
11543
11544
11545 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11546 pr_warn("Tracing disabled due to lockdown\n");
11547 return -EPERM;
11548 }
11549
11550 /*
11551 * Make sure we don't accidentally add more trace options
11552 * than we have bits for.
11553 */
11554 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11555
11556 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11557 return -ENOMEM;
11558
11559 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11560 goto out_free_buffer_mask;
11561
11562 /* Only allocate trace_printk buffers if a trace_printk exists */
11563 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11564 /* Must be called before global_trace.buffer is allocated */
11565 trace_printk_init_buffers();
11566
11567 /* To save memory, keep the ring buffer size to its minimum */
11568 if (global_trace.ring_buffer_expanded)
11569 ring_buf_size = trace_buf_size;
11570 else
11571 ring_buf_size = 1;
11572
11573 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11574 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11575
11576 raw_spin_lock_init(&global_trace.start_lock);
11577
11578 /*
11579 * The prepare callbacks allocates some memory for the ring buffer. We
11580 * don't free the buffer if the CPU goes down. If we were to free
11581 * the buffer, then the user would lose any trace that was in the
11582 * buffer. The memory will be removed once the "instance" is removed.
11583 */
11584 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11585 "trace/RB:prepare", trace_rb_cpu_prepare,
11586 NULL);
11587 if (ret < 0)
11588 goto out_free_cpumask;
11589 /* Used for event triggers */
11590 ret = -ENOMEM;
11591 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11592 if (!temp_buffer)
11593 goto out_rm_hp_state;
11594
11595 if (trace_create_savedcmd() < 0)
11596 goto out_free_temp_buffer;
11597
11598 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11599 goto out_free_savedcmd;
11600
11601 /* TODO: make the number of buffers hot pluggable with CPUS */
11602 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11603 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11604 goto out_free_pipe_cpumask;
11605 }
11606 if (global_trace.buffer_disabled)
11607 tracing_off();
11608
11609 if (trace_boot_clock) {
11610 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11611 if (ret < 0)
11612 pr_warn("Trace clock %s not defined, going back to default\n",
11613 trace_boot_clock);
11614 }
11615
11616 /*
11617 * register_tracer() might reference current_trace, so it
11618 * needs to be set before we register anything. This is
11619 * just a bootstrap of current_trace anyway.
11620 */
11621 global_trace.current_trace = &nop_trace;
11622 global_trace.current_trace_flags = nop_trace.flags;
11623
11624 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11625 #ifdef CONFIG_TRACER_MAX_TRACE
11626 spin_lock_init(&global_trace.snapshot_trigger_lock);
11627 #endif
11628 ftrace_init_global_array_ops(&global_trace);
11629
11630 #ifdef CONFIG_MODULES
11631 INIT_LIST_HEAD(&global_trace.mod_events);
11632 #endif
11633
11634 init_trace_flags_index(&global_trace);
11635
11636 INIT_LIST_HEAD(&global_trace.tracers);
11637
11638 /* All seems OK, enable tracing */
11639 tracing_disabled = 0;
11640
11641 atomic_notifier_chain_register(&panic_notifier_list,
11642 &trace_panic_notifier);
11643
11644 register_die_notifier(&trace_die_notifier);
11645
11646 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11647
11648 global_trace.syscall_buf_sz = syscall_buf_size;
11649
11650 INIT_LIST_HEAD(&global_trace.systems);
11651 INIT_LIST_HEAD(&global_trace.events);
11652 INIT_LIST_HEAD(&global_trace.hist_vars);
11653 INIT_LIST_HEAD(&global_trace.err_log);
11654 list_add(&global_trace.marker_list, &marker_copies);
11655 list_add(&global_trace.list, &ftrace_trace_arrays);
11656
11657 register_tracer(&nop_trace);
11658
11659 /* Function tracing may start here (via kernel command line) */
11660 init_function_trace();
11661
11662 apply_trace_boot_options();
11663
11664 register_snapshot_cmd();
11665
11666 return 0;
11667
11668 out_free_pipe_cpumask:
11669 free_cpumask_var(global_trace.pipe_cpumask);
11670 out_free_savedcmd:
11671 trace_free_saved_cmdlines_buffer();
11672 out_free_temp_buffer:
11673 ring_buffer_free(temp_buffer);
11674 out_rm_hp_state:
11675 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11676 out_free_cpumask:
11677 free_cpumask_var(global_trace.tracing_cpumask);
11678 out_free_buffer_mask:
11679 free_cpumask_var(tracing_buffer_mask);
11680 return ret;
11681 }
11682
11683 #ifdef CONFIG_FUNCTION_TRACER
11684 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11685 struct trace_array *trace_get_global_array(void)
11686 {
11687 return &global_trace;
11688 }
11689 #endif
11690
ftrace_boot_snapshot(void)11691 void __init ftrace_boot_snapshot(void)
11692 {
11693 #ifdef CONFIG_TRACER_MAX_TRACE
11694 struct trace_array *tr;
11695
11696 if (!snapshot_at_boot)
11697 return;
11698
11699 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11700 if (!tr->allocated_snapshot)
11701 continue;
11702
11703 tracing_snapshot_instance(tr);
11704 trace_array_puts(tr, "** Boot snapshot taken **\n");
11705 }
11706 #endif
11707 }
11708
early_trace_init(void)11709 void __init early_trace_init(void)
11710 {
11711 if (tracepoint_printk) {
11712 tracepoint_print_iter =
11713 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11714 if (MEM_FAIL(!tracepoint_print_iter,
11715 "Failed to allocate trace iterator\n"))
11716 tracepoint_printk = 0;
11717 else
11718 static_key_enable(&tracepoint_printk_key.key);
11719 }
11720 tracer_alloc_buffers();
11721
11722 init_events();
11723 }
11724
trace_init(void)11725 void __init trace_init(void)
11726 {
11727 trace_event_init();
11728
11729 if (boot_instance_index)
11730 enable_instances();
11731 }
11732
clear_boot_tracer(void)11733 __init static void clear_boot_tracer(void)
11734 {
11735 /*
11736 * The default tracer at boot buffer is an init section.
11737 * This function is called in lateinit. If we did not
11738 * find the boot tracer, then clear it out, to prevent
11739 * later registration from accessing the buffer that is
11740 * about to be freed.
11741 */
11742 if (!default_bootup_tracer)
11743 return;
11744
11745 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11746 default_bootup_tracer);
11747 default_bootup_tracer = NULL;
11748 }
11749
11750 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11751 __init static void tracing_set_default_clock(void)
11752 {
11753 /* sched_clock_stable() is determined in late_initcall */
11754 if (!trace_boot_clock && !sched_clock_stable()) {
11755 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11756 pr_warn("Can not set tracing clock due to lockdown\n");
11757 return;
11758 }
11759
11760 printk(KERN_WARNING
11761 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11762 "If you want to keep using the local clock, then add:\n"
11763 " \"trace_clock=local\"\n"
11764 "on the kernel command line\n");
11765 tracing_set_clock(&global_trace, "global");
11766 }
11767 }
11768 #else
tracing_set_default_clock(void)11769 static inline void tracing_set_default_clock(void) { }
11770 #endif
11771
late_trace_init(void)11772 __init static int late_trace_init(void)
11773 {
11774 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11775 static_key_disable(&tracepoint_printk_key.key);
11776 tracepoint_printk = 0;
11777 }
11778
11779 if (traceoff_after_boot)
11780 tracing_off();
11781
11782 tracing_set_default_clock();
11783 clear_boot_tracer();
11784 return 0;
11785 }
11786
11787 late_initcall_sync(late_trace_init);
11788